From f0f2ad55d220e16d489746abaf1ab3df1e0ee8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fahrican=20Ko=C5=9Far?= Date: Thu, 5 Oct 2023 21:10:08 -0400 Subject: [PATCH] reorganize router code, add NetlistRouter also: clip high-fanout BBs by original BBs to avoid data races in the parallel case --- utils/route_diag/src/main.cpp | 5 +- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/base/place_and_route.cpp | 54 +- vpr/src/base/read_options.cpp | 2 - vpr/src/base/vpr_api.cpp | 37 +- vpr/src/base/vpr_types.h | 2 + vpr/src/draw/draw.cpp | 2 +- vpr/src/draw/draw_basic.cpp | 2 +- vpr/src/draw/draw_rr.h | 2 +- vpr/src/draw/draw_rr_edges.h | 2 +- vpr/src/draw/draw_searchbar.h | 2 +- vpr/src/draw/draw_toggle_functions.h | 2 +- vpr/src/draw/draw_triangle.h | 2 +- vpr/src/draw/search_bar.cpp | 2 +- vpr/src/place/place_timing_update.h | 2 + vpr/src/place/timing_place_lookup.cpp | 2 +- vpr/src/route/ParallelNetlistRouter.h | 93 + vpr/src/route/ParallelNetlistRouter.tpp | 104 + vpr/src/route/SerialNetlistRouter.h | 71 + vpr/src/route/SerialNetlistRouter.tpp | 72 + vpr/src/route/channel_stats.cpp | 2 +- vpr/src/route/connection_based_routing.cpp | 1 - vpr/src/route/connection_router.cpp | 114 +- vpr/src/route/connection_router.h | 10 +- vpr/src/route/connection_router_interface.h | 12 +- vpr/src/route/netlist_routers.h | 168 ++ vpr/src/route/partition_tree.h | 18 - vpr/src/route/route.cpp | 625 +++++ vpr/src/route/route.h | 33 + vpr/src/route/route_budgets.cpp | 1 - vpr/src/route/route_common.cpp | 254 +- vpr/src/route/route_common.h | 15 +- vpr/src/route/route_debug.cpp | 30 + vpr/src/route/route_debug.h | 20 + vpr/src/route/route_export.h | 41 +- vpr/src/route/route_net.cpp | 338 +++ vpr/src/route/route_net.h | 119 + vpr/src/route/route_net.tpp | 478 ++++ vpr/src/route/route_parallel.cpp | 1059 -------- vpr/src/route/route_parallel.h | 33 - vpr/src/route/route_timing.cpp | 2263 ----------------- vpr/src/route/route_timing.h | 291 --- vpr/src/route/route_tree.cpp | 4 +- .../{route_util.cpp => route_utilization.cpp} | 2 +- .../{route_util.h => route_utilization.h} | 0 vpr/src/route/route_utils.cpp | 536 ++++ vpr/src/route/route_utils.h | 148 ++ vpr/src/route/router_delay_profiling.cpp | 5 +- vpr/src/route/router_delay_profiling.h | 1 - vpr/src/route/router_lookahead.cpp | 2 +- .../route/router_lookahead_extended_map.cpp | 3 +- vpr/src/route/router_lookahead_map.cpp | 2 +- vpr/src/route/router_lookahead_map_utils.cpp | 8 +- vpr/src/route/router_stats.h | 24 + vpr/test/test_connection_router.cpp | 4 +- vtr_flow/scripts/python_libs/vtr/task.py | 8 +- vtr_flow/scripts/python_libs/vtr/util.py | 9 +- 57 files changed, 3028 insertions(+), 4114 deletions(-) create mode 100644 vpr/src/route/ParallelNetlistRouter.h create mode 100644 vpr/src/route/ParallelNetlistRouter.tpp create mode 100644 vpr/src/route/SerialNetlistRouter.h create mode 100644 vpr/src/route/SerialNetlistRouter.tpp create mode 100644 vpr/src/route/netlist_routers.h create mode 100644 vpr/src/route/route.cpp create mode 100644 vpr/src/route/route.h create mode 100644 vpr/src/route/route_debug.cpp create mode 100644 vpr/src/route/route_debug.h create mode 100644 vpr/src/route/route_net.cpp create mode 100644 vpr/src/route/route_net.h create mode 100644 vpr/src/route/route_net.tpp delete mode 100644 vpr/src/route/route_parallel.cpp delete mode 100644 vpr/src/route/route_parallel.h delete mode 100644 vpr/src/route/route_timing.cpp delete mode 100644 vpr/src/route/route_timing.h rename vpr/src/route/{route_util.cpp => route_utilization.cpp} (99%) rename vpr/src/route/{route_util.h => route_utilization.h} (100%) create mode 100644 vpr/src/route/route_utils.cpp create mode 100644 vpr/src/route/route_utils.h diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 0cf1c901d23..d322890e8ec 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -33,7 +33,7 @@ #include "router_delay_profiling.h" #include "route_tree.h" #include "route_common.h" -#include "route_timing.h" +#include "route_net.h" #include "route_export.h" #include "rr_graph.h" #include "rr_graph2.h" @@ -124,8 +124,7 @@ static void do_one_route(const Netlist<>& net_list, cost_params, bounding_box, router_stats, - conn_params, - true); + conn_params); if (found_path) { VTR_ASSERT(cheapest.index == sink_node); diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index e596bd51c43..eecec4d39ce 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -481,6 +481,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts) RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report; RouterOpts->flat_routing = Options.flat_routing; RouterOpts->has_choking_spot = Options.has_choking_spot; + RouterOpts->with_timing_analysis = Options.timing_analysis; } static void SetupAnnealSched(const t_options& Options, diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index c34cd9cfbae..b1916852a34 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -20,6 +20,7 @@ #include "place.h" #include "read_place.h" #include "read_route.h" +#include "route.h" #include "route_export.h" #include "draw.h" #include "stats.h" @@ -191,19 +192,19 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, arch->num_directs, false); } - success = try_route(router_net_list, - current, - router_opts, - analysis_opts, - det_routing_arch, segment_inf, - net_delay, - timing_info, - delay_calc, - arch->Chans, - arch->Directs, - arch->num_directs, - (attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR, - is_flat); + success = route(router_net_list, + current, + router_opts, + analysis_opts, + det_routing_arch, segment_inf, + net_delay, + timing_info, + delay_calc, + arch->Chans, + arch->Directs, + arch->num_directs, + (attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR, + is_flat); attempt_count++; fflush(stdout); @@ -331,19 +332,20 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, false); } - success = try_route(router_net_list, - current, - router_opts, - analysis_opts, - det_routing_arch, segment_inf, - net_delay, - timing_info, - delay_calc, - arch->Chans, - arch->Directs, - arch->num_directs, - ScreenUpdatePriority::MINOR, - is_flat); + success = route(router_net_list, + current, + router_opts, + analysis_opts, + det_routing_arch, + segment_inf, + net_delay, + timing_info, + delay_calc, + arch->Chans, + arch->Directs, + arch->num_directs, + ScreenUpdatePriority::MINOR, + is_flat); if (success && Fc_clipped == false) { final = current; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 32929c4fc9a..0764b76f947 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2986,8 +2986,6 @@ void set_conditional_defaults(t_options& args) { */ //Base cost type if (args.base_cost_type.provenance() != Provenance::SPECIFIED) { - VTR_ASSERT(args.RouterAlgorithm == TIMING_DRIVEN || args.RouterAlgorithm == PARALLEL); - if (args.RouteType == DETAILED) { if (args.timing_analysis) { args.base_cost_type.set(DELAY_NORMALIZED_LENGTH, Provenance::INFERRED); diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 65519d5775f..1e4684ae683 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -53,6 +53,7 @@ #include "pb_type_graph.h" #include "route_common.h" #include "timing_place_lookup.h" +#include "route.h" #include "route_export.h" #include "vpr_api.h" #include "read_sdc.h" @@ -61,9 +62,9 @@ #include "lb_type_rr_graph.h" #include "read_activity.h" #include "net_delay.h" -#include "AnalysisDelayCalculator.h" #include "concrete_timing_info.h" #include "netlist_writer.h" +#include "AnalysisDelayCalculator.h" #include "RoutingDelayCalculator.h" #include "check_route.h" #include "constant_nets.h" @@ -367,7 +368,6 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) { } #ifdef VPR_USE_TBB - /* Set this here, because tbb::global_control doesn't control anything once it's out of scope * (contrary to the name). */ tbb::global_control c(tbb::global_control::max_allowed_parallelism, vpr_setup.num_workers); @@ -805,10 +805,11 @@ RouteStatus vpr_route_flow(const Netlist<>& net_list, std::shared_ptr routing_delay_calc = nullptr; if (vpr_setup.Timing.timing_analysis_enabled) { auto& atom_ctx = g_vpr_ctx.atom(); - routing_delay_calc = std::make_shared(atom_ctx.nlist, atom_ctx.lookup, net_delay, is_flat); - timing_info = make_setup_hold_timing_info(routing_delay_calc, router_opts.timing_update_type); + } else { + /* No delay calculator (segfault if the code calls into it) and wirelength driven routing */ + timing_info = make_constant_timing_info(0); } if (router_opts.doRouting == STAGE_DO) { @@ -922,20 +923,20 @@ RouteStatus vpr_route_fixed_W(const Netlist<>& net_list, VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Fixed channel width must be specified when routing at fixed channel width (was %d)", fixed_channel_width); } bool status = false; - status = try_route(net_list, - fixed_channel_width, - vpr_setup.RouterOpts, - vpr_setup.AnalysisOpts, - &vpr_setup.RoutingArch, - vpr_setup.Segments, - net_delay, - timing_info, - delay_calc, - arch.Chans, - arch.Directs, - arch.num_directs, - ScreenUpdatePriority::MAJOR, - is_flat); + status = route(net_list, + fixed_channel_width, + vpr_setup.RouterOpts, + vpr_setup.AnalysisOpts, + &vpr_setup.RoutingArch, + vpr_setup.Segments, + net_delay, + timing_info, + delay_calc, + arch.Chans, + arch.Directs, + arch.num_directs, + ScreenUpdatePriority::MAJOR, + is_flat); return RouteStatus(status, fixed_channel_width); } diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 2784c5e63da..438b81086cc 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1449,6 +1449,8 @@ struct t_router_opts { bool flat_routing; bool has_choking_spot; + bool with_timing_analysis; + // Options related to rr_node reordering, for testing and possible cache optimization e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm = DONT_REORDER; int reorder_rr_graph_nodes_threshold = 0; diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index d4e22cd3c95..7f66de9a951 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -86,7 +86,7 @@ # endif # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" # include "draw_rr.h" diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index e35ebcef0a6..e3f8c2adcab 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -60,7 +60,7 @@ # endif # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" diff --git a/vpr/src/draw/draw_rr.h b/vpr/src/draw/draw_rr.h index 49e0949271b..7ed40ffd61b 100644 --- a/vpr/src/draw/draw_rr.h +++ b/vpr/src/draw/draw_rr.h @@ -43,7 +43,7 @@ # include "manual_moves.h" # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" diff --git a/vpr/src/draw/draw_rr_edges.h b/vpr/src/draw/draw_rr_edges.h index 81077e22d01..9068b31b4fe 100644 --- a/vpr/src/draw/draw_rr_edges.h +++ b/vpr/src/draw/draw_rr_edges.h @@ -43,7 +43,7 @@ # include "manual_moves.h" # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" diff --git a/vpr/src/draw/draw_searchbar.h b/vpr/src/draw/draw_searchbar.h index 6fc1092afd3..e4dade9bb58 100644 --- a/vpr/src/draw/draw_searchbar.h +++ b/vpr/src/draw/draw_searchbar.h @@ -44,7 +44,7 @@ # include "manual_moves.h" # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" diff --git a/vpr/src/draw/draw_toggle_functions.h b/vpr/src/draw/draw_toggle_functions.h index 6c256cc2310..7b8330396b7 100644 --- a/vpr/src/draw/draw_toggle_functions.h +++ b/vpr/src/draw/draw_toggle_functions.h @@ -49,7 +49,7 @@ # include "manual_moves.h" # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" diff --git a/vpr/src/draw/draw_triangle.h b/vpr/src/draw/draw_triangle.h index 51eab896244..0ddf12218ec 100644 --- a/vpr/src/draw/draw_triangle.h +++ b/vpr/src/draw/draw_triangle.h @@ -44,7 +44,7 @@ # include "manual_moves.h" # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" # include "buttons.h" diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index 590e4981d61..bd7e160d4ad 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -59,7 +59,7 @@ # endif # include "rr_graph.h" -# include "route_util.h" +# include "route_utilization.h" # include "place_macro.h" extern std::string rr_highlight_message; diff --git a/vpr/src/place/place_timing_update.h b/vpr/src/place/place_timing_update.h index 4ff180002bf..67fca81b3ee 100644 --- a/vpr/src/place/place_timing_update.h +++ b/vpr/src/place/place_timing_update.h @@ -7,6 +7,8 @@ #include "timing_place.h" #include "place_util.h" +#include "NetPinTimingInvalidator.h" + ///@brief Initialize the timing information and structures in the placer. void initialize_timing_info(const PlaceCritParams& crit_params, const PlaceDelayModel* delay_model, diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index fba8b1e9c46..543376201be 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -20,7 +20,7 @@ #include "globals.h" #include "place_and_route.h" #include "route_common.h" -#include "route_timing.h" +#include "route_net.h" #include "route_export.h" #include "rr_graph.h" #include "timing_place_lookup.h" diff --git a/vpr/src/route/ParallelNetlistRouter.h b/vpr/src/route/ParallelNetlistRouter.h new file mode 100644 index 00000000000..e562da15627 --- /dev/null +++ b/vpr/src/route/ParallelNetlistRouter.h @@ -0,0 +1,93 @@ +#pragma once + +/** @file Parallel case for NetlistRouter. Builds a \ref PartitionTree from the + * netlist according to net bounding boxes. Tree nodes are then routed in parallel + * using tbb::task_group. Each task routes the nets inside a node serially and then adds + * its child nodes to the task queue. This approach is serially equivalent & deterministic, + * but it can reduce QoR in congested cases [0]. + * + * Note that the parallel router does not support graphical router breakpoints. + * + * [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */ +#include "netlist_routers.h" + +#include + +/** Parallel impl for NetlistRouter. + * Holds enough context members to glue together ConnectionRouter and net routing functions, + * such as \ref route_net. Keeps the members in thread-local storage where needed, + * i.e. ConnectionRouters and RouteIterResults-es. + * See \ref route_net. */ +template +class ParallelNetlistRouter : public NetlistRouter { + public: + ParallelNetlistRouter( + const Netlist<>& net_list, + const RouterLookahead* router_lookahead, + const t_router_opts& router_opts, + CBRR& connections_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const vtr::vector>>& choking_spots, + bool is_flat) + : _routers_th(_make_router(router_lookahead, is_flat)) + , _net_list(net_list) + , _router_opts(router_opts) + , _connections_inf(connections_inf) + , _net_delay(net_delay) + , _netlist_pin_lookup(netlist_pin_lookup) + , _timing_info(timing_info) + , _pin_timing_invalidator(pin_timing_invalidator) + , _budgeting_inf(budgeting_inf) + , _routing_predictor(routing_predictor) + , _choking_spots(choking_spots) + , _is_flat(is_flat) {} + ~ParallelNetlistRouter() {} + + /** Run a single iteration of netlist routing for this->_net_list. This usually means calling + * \ref route_net for each net, which will handle other global updates. + * \return RouteIterResults for this iteration. */ + RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + void set_rcv_enabled(bool x); + void set_timing_info(std::shared_ptr timing_info); + + private: + /** A single task to route nets inside a PartitionTree node and add tasks for its child nodes to task group \p g. */ + void route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack); + + ConnectionRouter _make_router(const RouterLookahead* router_lookahead, bool is_flat) { + auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + return ConnectionRouter( + device_ctx.grid, + *router_lookahead, + device_ctx.rr_graph.rr_nodes(), + &device_ctx.rr_graph, + device_ctx.rr_rc_data, + device_ctx.rr_graph.rr_switch(), + route_ctx.rr_node_route_inf, + is_flat); + } + + /* Context fields */ + tbb::enumerable_thread_specific> _routers_th; + const Netlist<>& _net_list; + const t_router_opts& _router_opts; + CBRR& _connections_inf; + tbb::enumerable_thread_specific _results_th; + NetPinsMatrix& _net_delay; + const ClusteredPinAtomPinsLookup& _netlist_pin_lookup; + std::shared_ptr _timing_info; + NetPinTimingInvalidator* _pin_timing_invalidator; + route_budgets& _budgeting_inf; + const RoutingPredictor& _routing_predictor; + const vtr::vector>>& _choking_spots; + bool _is_flat; +}; + +#include "ParallelNetlistRouter.tpp" diff --git a/vpr/src/route/ParallelNetlistRouter.tpp b/vpr/src/route/ParallelNetlistRouter.tpp new file mode 100644 index 00000000000..3c73b784b6e --- /dev/null +++ b/vpr/src/route/ParallelNetlistRouter.tpp @@ -0,0 +1,104 @@ +#pragma once + +/** @file Impls for ParallelNetlistRouter */ + +#include "netlist_routers.h" +#include "route_net.h" +#include "vtr_time.h" + +template +inline RouteIterResults ParallelNetlistRouter::route_netlist(int itry, float pres_fac, float worst_neg_slack) { + /* Reset results for each thread */ + for (auto& results : _results_th) { + results = RouteIterResults(); + } + + /* Organize netlist into a PartitionTree. + * Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */ + PartitionTree tree(_net_list); + + /* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */ + tbb::task_group g; + route_partition_tree_node(g, tree.root(), itry, pres_fac, worst_neg_slack); + g.wait(); + + /* Combine results from threads */ + RouteIterResults out; + for (auto& results : _results_th) { + out.stats.combine(results.stats); + out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end()); + out.is_routable &= results.is_routable; + } + return out; +} + +template +void ParallelNetlistRouter::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + /* Sort so net with most sinks is routed first. */ + std::sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { + return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); + }); + + vtr::Timer t; + for (auto net_id : node.nets) { + auto flags = route_net( + _routers_th.local(), + _net_list, + net_id, + itry, + pres_fac, + _router_opts, + _connections_inf, + _results_th.local().stats, + _net_delay, + _netlist_pin_lookup, + _timing_info.get(), + _pin_timing_invalidator, + _budgeting_inf, + worst_neg_slack, + _routing_predictor, + _choking_spots[net_id], + _is_flat); + + if (!flags.success && !flags.retry_with_full_bb) { + /* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */ + _results_th.local().is_routable = false; + return; + } + if (flags.retry_with_full_bb) { + /* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */ + route_ctx.route_bb[net_id] = full_device_bb(); + continue; + } + if (flags.was_rerouted) { + _results_th.local().rerouted_nets.push_back(net_id); + } + } + PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s"); + + /* This node is finished: add left & right branches to the task queue */ + if (node.left && node.right) { + g.run([&]() { + route_partition_tree_node(g, *node.left, itry, pres_fac, worst_neg_slack); + }); + g.run([&]() { + route_partition_tree_node(g, *node.right, itry, pres_fac, worst_neg_slack); + }); + } else { + VTR_ASSERT(!node.left && !node.right); // there shouldn't be a node with a single branch + } +} + +template +void ParallelNetlistRouter::set_rcv_enabled(bool x) { + for (auto& router : _routers_th) { + router.set_rcv_enabled(x); + } +} + +template +void ParallelNetlistRouter::set_timing_info(std::shared_ptr timing_info) { + _timing_info = timing_info; +} diff --git a/vpr/src/route/SerialNetlistRouter.h b/vpr/src/route/SerialNetlistRouter.h new file mode 100644 index 00000000000..5bb59df1998 --- /dev/null +++ b/vpr/src/route/SerialNetlistRouter.h @@ -0,0 +1,71 @@ +#pragma once + +/** @file Serial case for \ref NetlistRouter: just loop through nets */ + +#include "netlist_routers.h" + +template +class SerialNetlistRouter : public NetlistRouter { + public: + SerialNetlistRouter( + const Netlist<>& net_list, + const RouterLookahead* router_lookahead, + const t_router_opts& router_opts, + CBRR& connections_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const vtr::vector>>& choking_spots, + bool is_flat) + : _router(_make_router(router_lookahead, is_flat)) + , _net_list(net_list) + , _router_opts(router_opts) + , _connections_inf(connections_inf) + , _net_delay(net_delay) + , _netlist_pin_lookup(netlist_pin_lookup) + , _timing_info(timing_info) + , _pin_timing_invalidator(pin_timing_invalidator) + , _budgeting_inf(budgeting_inf) + , _routing_predictor(routing_predictor) + , _choking_spots(choking_spots) + , _is_flat(is_flat) {} + ~SerialNetlistRouter() {} + + RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack); + void set_rcv_enabled(bool x); + void set_timing_info(std::shared_ptr timing_info); + + private: + ConnectionRouter _make_router(const RouterLookahead* router_lookahead, bool is_flat) { + auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + return ConnectionRouter( + device_ctx.grid, + *router_lookahead, + device_ctx.rr_graph.rr_nodes(), + &device_ctx.rr_graph, + device_ctx.rr_rc_data, + device_ctx.rr_graph.rr_switch(), + route_ctx.rr_node_route_inf, + is_flat); + } + /* Context fields */ + ConnectionRouter _router; + const Netlist<>& _net_list; + const t_router_opts& _router_opts; + CBRR& _connections_inf; + NetPinsMatrix& _net_delay; + const ClusteredPinAtomPinsLookup& _netlist_pin_lookup; + std::shared_ptr _timing_info; + NetPinTimingInvalidator* _pin_timing_invalidator; + route_budgets& _budgeting_inf; + const RoutingPredictor& _routing_predictor; + const vtr::vector>>& _choking_spots; + bool _is_flat; +}; + +#include "SerialNetlistRouter.tpp" diff --git a/vpr/src/route/SerialNetlistRouter.tpp b/vpr/src/route/SerialNetlistRouter.tpp new file mode 100644 index 00000000000..714426a1920 --- /dev/null +++ b/vpr/src/route/SerialNetlistRouter.tpp @@ -0,0 +1,72 @@ +#pragma once + +/** @file Templated implementations for SerialNetlistRouter */ + +#include "SerialNetlistRouter.h" +#include "route_net.h" + +template +inline RouteIterResults SerialNetlistRouter::route_netlist(int itry, float pres_fac, float worst_neg_slack) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + RouteIterResults out; + + /* Sort so net with most sinks is routed first */ + auto sorted_nets = std::vector(_net_list.nets().begin(), _net_list.nets().end()); + std::sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool { + return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size(); + }); + + for (size_t inet = 0; inet < sorted_nets.size(); inet++) { + ParentNetId net_id = sorted_nets[inet]; + NetResultFlags flags = route_net( + _router, + _net_list, + net_id, + itry, + pres_fac, + _router_opts, + _connections_inf, + out.stats, + _net_delay, + _netlist_pin_lookup, + _timing_info.get(), + _pin_timing_invalidator, + _budgeting_inf, + worst_neg_slack, + _routing_predictor, + _choking_spots[net_id], + _is_flat); + + if (!flags.success && !flags.retry_with_full_bb) { + /* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */ + out.is_routable = false; + return out; + } + + if (flags.retry_with_full_bb) { + /* Grow the BB and retry this net right away. */ + route_ctx.route_bb[net_id] = full_device_bb(); + inet--; + continue; + } + + if (flags.was_rerouted) { + out.rerouted_nets.push_back(net_id); +#ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_NET_ID, size_t(net_id)); +#endif + } + } + + return out; +} + +template +void SerialNetlistRouter::set_rcv_enabled(bool x) { + _router.set_rcv_enabled(x); +} + +template +void SerialNetlistRouter::set_timing_info(std::shared_ptr timing_info) { + _timing_info = timing_info; +} diff --git a/vpr/src/route/channel_stats.cpp b/vpr/src/route/channel_stats.cpp index e5a2f1703e2..065a6e5b7a0 100644 --- a/vpr/src/route/channel_stats.cpp +++ b/vpr/src/route/channel_stats.cpp @@ -1,5 +1,5 @@ #include "channel_stats.h" -#include "route_util.h" +#include "route_utilization.h" #include "histogram.h" #include "globals.h" diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp index eaa4c85ff26..a8f3d1b0a86 100644 --- a/vpr/src/route/connection_based_routing.cpp +++ b/vpr/src/route/connection_based_routing.cpp @@ -1,6 +1,5 @@ #include "connection_based_routing.h" -#include "route_timing.h" #include "route_profiling.h" // incremental rerouting resources class definitions diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index b99fb60b650..a883d611493 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -39,14 +39,13 @@ std::tuple ConnectionRouter::timing_driven_route_conne const t_conn_cost_params cost_params, t_bb bounding_box, RouterStats& router_stats, - const ConnectionParameters& conn_params, - bool can_grow_bb) { + const ConnectionParameters& conn_params) { router_stats_ = &router_stats; conn_params_ = &conn_params; bool retry = false; t_heap* cheapest; - std::tie(retry, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box, can_grow_bb); + std::tie(retry, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box); if (cheapest != nullptr) { rcv_path_manager.update_route_tree_set(cheapest->path_data); @@ -70,8 +69,7 @@ std::tuple ConnectionRouter::timing_driven_route_connection const RouteTreeNode& rt_root, RRNodeId sink_node, const t_conn_cost_params cost_params, - t_bb bounding_box, - bool can_grow_bb) { + t_bb bounding_box) { //Re-add route nodes from the existing route tree to the heap. //They need to be repushed onto the heap since each node's cost is target specific. @@ -100,60 +98,16 @@ std::tuple ConnectionRouter::timing_driven_route_connection if (bounding_box.xmin == 0 && bounding_box.ymin == 0 && bounding_box.xmax == (int)(grid_.width() - 1) - && bounding_box.ymax == (int)(grid_.height() - 1)) { + && bounding_box.ymax == (int)(grid_.height() - 1) + && bounding_box.layer_min == 0 + && bounding_box.layer_max == (int)(grid_.get_num_layers() - 1)) { VTR_LOG("%s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str()); return std::make_tuple(false, nullptr); } - // If we cannot grow the bounding box, leave unrouted and bubble up a signal - // to retry this net with a full-device bounding box. If we are already at full device extents, - // just fail - if (!can_grow_bb) { - VTR_LOG_WARN("No routing path for connection to sink_rr %d, leaving unrouted to retry on next iteration\n", sink_node); - return std::make_tuple(true, nullptr); - } - - // Otherwise, try again with full-device bounding box. - // - // Note that the additional run-time overhead of re-trying only occurs - // when we were otherwise going to give up -- the typical case (route - // found with the bounding box) remains fast and never re-tries . - VTR_LOG_WARN("No routing path for connection to sink_rr %d, retrying with full device bounding box\n", sink_node); - - t_bb full_device_bounding_box; - full_device_bounding_box.xmin = 0; - full_device_bounding_box.ymin = 0; - full_device_bounding_box.xmax = grid_.width() - 1; - full_device_bounding_box.ymax = grid_.height() - 1; - full_device_bounding_box.layer_min = 0; - full_device_bounding_box.layer_max = grid_.get_num_layers() - 1; - - // - //TODO: potential future optimization - // We have already explored the RR nodes accessible within the regular - // BB (which are stored in modified_rr_node_inf), and so already know - // their cost from the source. Instead of re-starting the path search - // from scratch (i.e. from the previous route tree as we do below), we - // could just re-add all the explored nodes to the heap and continue - // expanding. - // - - //Reset any previously recorded node costs so that when we call - //add_route_tree_to_heap() the nodes in the route tree actually - //make it back into the heap. - reset_path_costs(); - modified_rr_node_inf_.clear(); - heap_.empty_heap(); - - //Re-initialize the heap since it was emptied by the previous call to - //timing_driven_route_connection_from_heap() - add_route_tree_to_heap(rt_root, sink_node, cost_params, false); - heap_.build_heap(); // via sifting down everything - - //Try finding the path again with the relaxed bounding box - cheapest = timing_driven_route_connection_from_heap(sink_node, - cost_params, - full_device_bounding_box); + // Otherwise, leave unrouted and bubble up a signal to retry this net with a full-device bounding box + VTR_LOG_WARN("No routing path for connection to sink_rr %d, leaving unrouted to retry later\n", sink_node); + return std::make_tuple(true, nullptr); } if (cheapest == nullptr) { @@ -177,8 +131,7 @@ std::tuple ConnectionRouter::timing_driven_route_conne t_bb net_bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, - const ConnectionParameters& conn_params, - bool can_grow_bb) { + const ConnectionParameters& conn_params) { router_stats_ = &router_stats; conn_params_ = &conn_params; @@ -218,8 +171,7 @@ std::tuple ConnectionRouter::timing_driven_route_conne std::tie(retry_with_full_bb, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, - net_bounding_box, - can_grow_bb); + net_bounding_box); } if (cheapest == nullptr) { @@ -1003,19 +955,26 @@ void ConnectionRouter::add_route_tree_node_to_heap( } } -static t_bb adjust_highfanout_bounding_box(t_bb highfanout_bb) { - t_bb bb = highfanout_bb; +/* Expand bb by inode's extents and clip against net_bb */ +inline void expand_highfanout_bounding_box(t_bb& bb, const t_bb& net_bb, RRNodeId inode, const RRGraphView* rr_graph) { + bb.xmin = std::max(net_bb.xmin, std::min(bb.xmin, rr_graph->node_xlow(inode))); + bb.ymin = std::max(net_bb.ymin, std::min(bb.ymin, rr_graph->node_ylow(inode))); + bb.xmax = std::min(net_bb.xmax, std::max(bb.xmax, rr_graph->node_xhigh(inode))); + bb.ymax = std::min(net_bb.ymax, std::max(bb.ymax, rr_graph->node_yhigh(inode))); + bb.layer_min = std::min(bb.layer_min, rr_graph->node_layer(inode)); + bb.layer_max = std::max(bb.layer_max, rr_graph->node_layer(inode)); +} +/* Expand bb by HIGH_FANOUT_BB_FAC and clip against net_bb */ +inline void adjust_highfanout_bounding_box(t_bb& bb, const t_bb& net_bb) { constexpr int HIGH_FANOUT_BB_FAC = 3; - bb.xmin -= HIGH_FANOUT_BB_FAC; - bb.ymin -= HIGH_FANOUT_BB_FAC; - bb.xmax += HIGH_FANOUT_BB_FAC; - bb.ymax += HIGH_FANOUT_BB_FAC; - - bb.layer_min = highfanout_bb.layer_min; - bb.layer_max = highfanout_bb.layer_max; - return bb; + bb.xmin = std::max(net_bb.xmin, bb.xmin - HIGH_FANOUT_BB_FAC); + bb.ymin = std::max(net_bb.ymin, bb.ymin - HIGH_FANOUT_BB_FAC); + bb.xmax = std::min(net_bb.xmax, bb.xmax + HIGH_FANOUT_BB_FAC); + bb.ymax = std::min(net_bb.ymax, bb.ymax + HIGH_FANOUT_BB_FAC); + bb.layer_min = std::min(net_bb.layer_min, bb.layer_min); + bb.layer_max = std::min(net_bb.layer_min, bb.layer_max); } template @@ -1079,13 +1038,9 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( // Put the node onto the heap add_route_tree_node_to_heap(rt_node, target_node, cost_params, true); - // Update Bounding Box - highfanout_bb.xmin = std::min(highfanout_bb.xmin, rr_graph_->node_xlow(rr_node_to_add)); - highfanout_bb.ymin = std::min(highfanout_bb.ymin, rr_graph_->node_ylow(rr_node_to_add)); - highfanout_bb.xmax = std::max(highfanout_bb.xmax, rr_graph_->node_xhigh(rr_node_to_add)); - highfanout_bb.ymax = std::max(highfanout_bb.ymax, rr_graph_->node_yhigh(rr_node_to_add)); - highfanout_bb.layer_min = std::min(highfanout_bb.layer_min, rr_graph_->node_layer(rr_node_to_add)); - highfanout_bb.layer_max = std::max(highfanout_bb.layer_max, rr_graph_->node_layer(rr_node_to_add)); + // Expand HF BB to include the node (clip by original BB) + expand_highfanout_bounding_box(highfanout_bb, net_bounding_box, rr_node_to_add, rr_graph_); + if (is_flat_) { if (rr_graph_->node_type(rr_node_to_add) == CHANY || rr_graph_->node_type(rr_node_to_add) == CHANX) { chan_nodes_added++; @@ -1111,15 +1066,14 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( if (done) break; } - t_bb bounding_box = net_bounding_box; if (nodes_added == 0) { //If the target bin, and it's surrounding bins were empty, just add the full route tree add_route_tree_to_heap(rt_root, target_node, cost_params, true); + return net_bounding_box; } else { //We found nearby routing, replace original bounding box to be localized around that routing - bounding_box = adjust_highfanout_bounding_box(highfanout_bb); + adjust_highfanout_bounding_box(highfanout_bb, net_bounding_box); + return highfanout_bb; } - - return bounding_box; } static inline bool has_path_to_sink(const t_rr_graph_view& rr_nodes, diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h index 093ab8fed83..b2806f41775 100644 --- a/vpr/src/route/connection_router.h +++ b/vpr/src/route/connection_router.h @@ -75,8 +75,7 @@ class ConnectionRouter : public ConnectionRouterInterface { const t_conn_cost_params cost_params, t_bb bounding_box, RouterStats& router_stats, - const ConnectionParameters& conn_params, - bool can_grow_bb) final; + const ConnectionParameters& conn_params) final; /** Finds a path from the route tree rooted at rt_root to sink_node for a * high fanout net. @@ -95,8 +94,7 @@ class ConnectionRouter : public ConnectionRouterInterface { t_bb net_bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, - const ConnectionParameters& conn_params, - bool can_grow_bb) final; + const ConnectionParameters& conn_params) final; // Finds a path from the route tree rooted at rt_root to all sinks // available. @@ -160,15 +158,13 @@ class ConnectionRouter : public ConnectionRouterInterface { * @param[in] sink_node Sink node ID to route to * @param[in] cost_params * @param[in] bounding_box Keep search confined to this bounding box - * @param[in] can_grow_bb Can this fn grow the given bounding box? * @return bool Signal to retry this connection with a full-device bounding box, * @return t_heap* Heap element describing the path found. */ std::tuple timing_driven_route_connection_common_setup( const RouteTreeNode& rt_root, RRNodeId sink_node, const t_conn_cost_params cost_params, - t_bb bounding_box, - bool can_grow_bb); + t_bb bounding_box); // Finds a path to sink_node, starting from the elements currently in the // heap. diff --git a/vpr/src/route/connection_router_interface.h b/vpr/src/route/connection_router_interface.h index 2180dbe76f3..d6b0baafab5 100644 --- a/vpr/src/route/connection_router_interface.h +++ b/vpr/src/route/connection_router_interface.h @@ -37,7 +37,7 @@ class ConnectionRouterInterface { public: virtual ~ConnectionRouterInterface() {} - // Clear's the modified list. Should be called after reset_path_costs + // Clears the modified list. Should be called after reset_path_costs // have been called. virtual void clear_modified_rr_node_info() = 0; @@ -50,7 +50,7 @@ class ConnectionRouterInterface { * * Returns a tuple of: * bool: path exists? (hard failure, rr graph disconnected) - * bool: should retry with full bounding box? (only used in parallel routing) + * bool: should retry with full bounding box? * t_heap: heap element of cheapest path */ virtual std::tuple timing_driven_route_connection_from_route_tree( const RouteTreeNode& rt_root, @@ -58,8 +58,7 @@ class ConnectionRouterInterface { const t_conn_cost_params cost_params, t_bb bounding_box, RouterStats& router_stats, - const ConnectionParameters& conn_params, - bool can_grow_bb) + const ConnectionParameters& conn_params) = 0; /** Finds a path from the route tree rooted at rt_root to sink_node for a @@ -70,7 +69,7 @@ class ConnectionRouterInterface { * * Returns a tuple of: * bool: path exists? (hard failure, rr graph disconnected) - * bool: should retry with full bounding box? (only used in parallel routing) + * bool: should retry with full bounding box? * t_heap: heap element of cheapest path */ virtual std::tuple timing_driven_route_connection_from_route_tree_high_fanout( const RouteTreeNode& rt_root, @@ -79,8 +78,7 @@ class ConnectionRouterInterface { t_bb bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, - const ConnectionParameters& conn_params, - bool can_grow_bb) + const ConnectionParameters& conn_params) = 0; // Finds a path from the route tree rooted at rt_root to all sinks diff --git a/vpr/src/route/netlist_routers.h b/vpr/src/route/netlist_routers.h new file mode 100644 index 00000000000..db51ff01b23 --- /dev/null +++ b/vpr/src/route/netlist_routers.h @@ -0,0 +1,168 @@ +#pragma once + +/** @file Interface for a netlist router. + * + * A NetlistRouter manages the required bits of state to complete the netlist routing process, + * which requires finding a path for every connection in the netlist using a ConnectionRouter. + * This needs to be an interface because there may be different netlist routing schedules, + * i.e. parallel or net-decomposing routers. + * + * Includes derived classes of NetlistRouter and a fn to provide the correct NetlistRouter + * for given router options. + * + * NetlistRouter impls are typically templated by HeapType, since the single implementation + * of ConnectionRouterInterface is templated by a heap type at the moment. Any templated + * NetlistRouter-derived class is still a NetlistRouter, so that is transparent to the user + * of this interface. */ + +#include "NetPinTimingInvalidator.h" +#include "binary_heap.h" +#include "bucket.h" +#include "clustered_netlist_utils.h" +#include "connection_based_routing_fwd.h" +#include "connection_router.h" +#include "globals.h" +#include "heap_type.h" +#include "netlist_fwd.h" +#include "partition_tree.h" +#include "routing_predictor.h" +#include "route_budgets.h" +#include "route_utils.h" +#include "router_stats.h" +#include "timing_info.h" +#include "vpr_net_pins_matrix.h" +#include "vpr_types.h" + +/** Results for a single netlist routing run inside a routing iteration. */ +struct RouteIterResults { + /** Are there any connections impossible to route due to a disconnected rr_graph? */ + bool is_routable = true; + /** Net IDs with changed routing */ + std::vector rerouted_nets; + /** RouterStats for this iteration */ + RouterStats stats; +}; + +/** Route a given netlist. Takes a big context and passes it around to net & sink routing fns. + * route_netlist only needs to call the functions in route_net.h/tpp: they handle the global + * bookkeeping. */ +class NetlistRouter { + public: + virtual ~NetlistRouter() {} + + /** Run a single iteration of netlist routing for this->_net_list. This usually means calling + * route_net for each net, which will handle other global updates. + * \return RouteIterResults for this iteration. */ + virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0; + + /** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/ + virtual void set_rcv_enabled(bool x) = 0; + + /** Set this NetlistRouter's timing_info ptr. We sometimes change timing_info + * throughout iterations, but not frequently enough to make it a public member. */ + virtual void set_timing_info(std::shared_ptr timing_info) = 0; +}; + +/* Include the derived classes here to get the HeapType-templated impls */ +#include "SerialNetlistRouter.h" +#ifdef VPR_USE_TBB +# include "ParallelNetlistRouter.h" +#endif + +template +inline std::unique_ptr make_netlist_router_with_heap( + const Netlist<>& net_list, + const RouterLookahead* router_lookahead, + const t_router_opts& router_opts, + CBRR& connections_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const vtr::vector>>& choking_spots, + bool is_flat) { + if (router_opts.router_algorithm == e_router_algorithm::TIMING_DRIVEN) { + return std::make_unique>( + net_list, + router_lookahead, + router_opts, + connections_inf, + net_delay, + netlist_pin_lookup, + timing_info, + pin_timing_invalidator, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); + } else if (router_opts.router_algorithm == e_router_algorithm::PARALLEL) { +#ifdef VPR_USE_TBB + return std::make_unique>( + net_list, + router_lookahead, + router_opts, + connections_inf, + net_delay, + netlist_pin_lookup, + timing_info, + pin_timing_invalidator, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); +#else + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "VPR isn't compiled with TBB support required for parallel routing"); +#endif + } else { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown router algorithm %d", router_opts.router_algorithm); + } +} + +/** Make a NetlistRouter depending on router_algorithm and router_heap in \p router_opts. */ +inline std::unique_ptr make_netlist_router( + const Netlist<>& net_list, + const RouterLookahead* router_lookahead, + const t_router_opts& router_opts, + CBRR& connections_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const vtr::vector>>& choking_spots, + bool is_flat) { + if (router_opts.router_heap == e_heap_type::BINARY_HEAP) { + return make_netlist_router_with_heap( + net_list, + router_lookahead, + router_opts, + connections_inf, + net_delay, + netlist_pin_lookup, + timing_info, + pin_timing_invalidator, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); + } else if (router_opts.router_heap == e_heap_type::BUCKET_HEAP_APPROXIMATION) { + return make_netlist_router_with_heap( + net_list, + router_lookahead, + router_opts, + connections_inf, + net_delay, + netlist_pin_lookup, + timing_info, + pin_timing_invalidator, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); + } else { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap); + } +} diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h index 08eb668a88f..aef794f0f08 100644 --- a/vpr/src/route/partition_tree.h +++ b/vpr/src/route/partition_tree.h @@ -25,16 +25,6 @@ inline Side operator!(const Side& rhs) { return Side(!size_t(rhs)); } -/** Routing iteration results per thread. (for a subset of the input netlist) */ -struct RouteIterResults { - /** Are there any connections impossible to route due to a disconnected rr_graph? */ - bool is_routable = true; - /** Net IDs for which timing_driven_route_net() actually got called */ - std::vector rerouted_nets; - /** RouterStats collected from my subset of nets */ - RouterStats stats; -}; - /** Spatial partition tree for routing. * * This divides the netlist into a tree of regions, so that nets with non-overlapping @@ -56,10 +46,6 @@ class PartitionTreeNode { std::unique_ptr left = nullptr; /** Right subtree. */ std::unique_ptr right = nullptr; - /** Are there any connections impossible to route due to a disconnected rr_graph? */ - bool is_routable = false; - /** Net IDs for which timing_driven_route_net() actually got called */ - std::vector rerouted_nets; /* Axis of the cutline. */ Axis cutline_axis = Axis::X; /* Position of the cutline. It's a float, because cutlines are considered to be "between" integral coordinates. */ @@ -92,11 +78,7 @@ class PartitionTree { /** Log PartitionTree-related messages. Can handle multiple threads. */ class PartitionTreeDebug { public: -# ifdef VPR_USE_TBB static inline tbb::concurrent_vector lines; -# else - static inline std::vector lines; -# endif /** Add msg to the log buffer (with a thread ID header) */ static inline void log(std::string msg) { auto thread_id = std::hash()(std::this_thread::get_id()); diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp new file mode 100644 index 00000000000..469a0455006 --- /dev/null +++ b/vpr/src/route/route.cpp @@ -0,0 +1,625 @@ +#include "concrete_timing_info.h" +#include "connection_based_routing.h" +#include "draw.h" +#include "netlist_routers.h" +#include "place_and_route.h" +#include "read_route.h" +#include "route.h" +#include "route_common.h" +#include "route_debug.h" +#include "route_export.h" +#include "route_profiling.h" +#include "route_utils.h" +#include "vtr_time.h" + +bool route(const Netlist<>& net_list, + int width_fac, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + NetPinsMatrix& net_delay, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + t_chan_width_dist chan_width_dist, + t_direct_inf* directs, + int num_directs, + ScreenUpdatePriority first_iteration_priority, + bool is_flat) { + auto& device_ctx = g_vpr_ctx.mutable_device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& atom_ctx = g_vpr_ctx.atom(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + if (net_list.nets().empty()) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "No nets to route\n"); + } + + t_graph_type graph_type; + t_graph_type graph_directionality; + if (router_opts.route_type == GLOBAL) { + graph_type = GRAPH_GLOBAL; + graph_directionality = GRAPH_BIDIR; + } else { + graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); + graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); + } + + /* Set the channel widths */ + t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality); + + /* Set up the routing resource graph defined by this FPGA architecture. */ + int warning_count; + + create_rr_graph(graph_type, + device_ctx.physical_tile_types, + device_ctx.grid, + chan_width, + det_routing_arch, + segment_inf, + router_opts, + directs, + num_directs, + &warning_count, + is_flat); + + //Initialize drawing, now that we have an RR graph + init_draw_coords(width_fac); + + /* Allocate and load additional rr_graph information needed only by the router. */ + alloc_and_load_rr_node_route_structs(); + + init_route_structs(net_list, + router_opts.bb_factor, + router_opts.has_choking_spot, + is_flat); + + IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types); + ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup); + + auto choking_spots = set_nets_choking_spots(net_list, + route_ctx.net_terminal_groups, + route_ctx.net_terminal_group_num, + router_opts.has_choking_spot, + is_flat); + + //Initially, the router runs normally trying to reduce congestion while + //balancing other metrics (timing, wirelength, run-time etc.) + RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL; + + //Initialize and properly size the lookups for profiling + profiling::profiling_initialization(get_max_pins_per_net(net_list)); + + /* + * Configure the routing predictor + */ + RoutingPredictor routing_predictor; + float abort_iteration_threshold = std::numeric_limits::infinity(); //Default no early abort + if (router_opts.routing_failure_predictor == SAFE) { + abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations; + } else if (router_opts.routing_failure_predictor == AGGRESSIVE) { + abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations; + } else { + VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting"); + } + + float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations; + + /* Set delay of ignored signals to zero. Non-ignored net delays are set by + * update_net_delays_from_route_tree() inside timing_driven_route_net(), + * which is only called for non-ignored nets. */ + for (auto net_id : net_list.nets()) { + if (net_list.net_is_ignored(net_id)) { + for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { + net_delay[net_id][ipin] = 0.; + } + } + } + + CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat}; + + route_budgets budgeting_inf(net_list, is_flat); + + // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized. + const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + + if (is_flat) { + // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since + // they haven't been initialized when the map related to global resources was initialized. + auto cache_key = route_ctx.router_lookahead_cache_key_; + std::unique_ptr mut_router_lookahead(route_ctx.cached_router_lookahead_.release()); + VTR_ASSERT(mut_router_lookahead); + route_ctx.cached_router_lookahead_.clear(); + if (!router_opts.read_intra_cluster_router_lookahead.empty()) { + mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead); + } else { + mut_router_lookahead->compute_intra_tile(); + } + route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead)); + router_lookahead = get_cached_router_lookahead(*det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + if (!router_opts.write_intra_cluster_router_lookahead.empty()) { + router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead); + } + } + + VTR_ASSERT(router_lookahead != nullptr); + + /* Routing parameters */ + float pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */ + int bb_fac = router_opts.bb_factor; + + /* When routing conflicts are detected the bounding boxes are scaled + * by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations */ + constexpr float BB_SCALE_FACTOR = 2; + constexpr int BB_SCALE_ITER_COUNT = 5; + + size_t available_wirelength = calculate_wirelength_available(); + + /* Routing status and metrics */ + bool success = false; + WirelengthInfo wirelength_info; + OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes()); + tatum::TimingPathInfo critical_path; + int itry; //Routing iteration number + int itry_conflicted_mode = 0; + + /* Best result so far */ + vtr::vector> best_routing; + t_clb_opins_used best_clb_opins_used_locally; + RoutingMetrics best_routing_metrics; + int legal_convergence_count = 0; + + /* Get initial criticalities from the lookahead */ + if (router_opts.with_timing_analysis && router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD) { + vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities"); + //Estimate initial connection delays from the router lookahead + init_net_delay_from_lookahead(*router_lookahead, + net_list, + route_ctx.net_rr_terminals, + net_delay, + device_ctx.rr_graph, + is_flat); + + //Run STA to get estimated criticalities + timing_info->update(); + VTR_LOG("Initial Net Connection Criticality Histogram:\n"); + print_router_criticality_histogram(net_list, *timing_info, netlist_pin_lookup, is_flat); + } + + std::unique_ptr pin_timing_invalidator; + pin_timing_invalidator = make_net_pin_timing_invalidator( + router_opts.timing_update_type, + net_list, + netlist_pin_lookup, + atom_ctx.nlist, + atom_ctx.lookup, + *timing_info->timing_graph(), + is_flat); + + std::unique_ptr netlist_router = make_netlist_router( + net_list, + router_lookahead, + router_opts, + connections_inf, + net_delay, + netlist_pin_lookup, + timing_info, + pin_timing_invalidator.get(), + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); + + RouterStats router_stats; + float prev_iter_cumm_time = 0; + vtr::Timer iteration_timer; + int num_net_bounding_boxes_updated = 0; + int itry_since_last_convergence = -1; + + // This heap is used for reserve_locally_used_opins. + BinaryHeap small_heap; + small_heap.init_heap(device_ctx.grid); + + // When RCV is enabled the router will not stop unless negative hold slack is 0 + // In some cases this isn't doable, due to global nets or intracluster routing issues + // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack + // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved + constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15; + + int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; + + print_route_status_header(); + for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) { + /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */ + for (auto net_id : net_list.nets()) { + route_ctx.net_status.set_is_routed(net_id, false); + route_ctx.net_status.set_is_fixed(net_id, false); + } + + if (itry_since_last_convergence >= 0) { + ++itry_since_last_convergence; + } + + // Calculate this once and pass it into net routing to check if should reroute for hold + float worst_negative_slack = 0; + if (budgeting_inf.if_set()) { + worst_negative_slack = timing_info->hold_total_negative_slack(); + } + + /* Initial criticalities: set to 1 on the first iter if the user asked for it */ + if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL && itry == 1) + netlist_router->set_timing_info(make_constant_timing_info(1)); + else + netlist_router->set_timing_info(timing_info); + + /* Route each net */ + RouteIterResults iter_results = netlist_router->route_netlist(itry, pres_fac, worst_negative_slack); + + if (!iter_results.is_routable) { /* Disconnected RRG */ + return false; + } + + // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose + bool rip_up_local_opins = (itry == 1 ? false : true); + if (!is_flat) { + reserve_locally_used_opins(&small_heap, pres_fac, + router_opts.acc_fac, rip_up_local_opins, is_flat); + } + + /* + * Calculate metrics for the current routing + */ + bool routing_is_feasible = feasible_routing(); + float est_success_iteration = routing_predictor.estimate_success_iteration(); + + //Update resource costs and overuse info + if (itry == 1) { + pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */ + } else { + pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info); + } + + wirelength_info = calculate_wirelength_info(net_list, available_wirelength); + routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes); + + //Update timing based on the new routing + //Note that the net delays have already been updated by timing_driven_route_net + timing_info->update(); + timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing + pin_timing_invalidator->reset(); + + critical_path = timing_info->least_slack_critical_path(); + + VTR_ASSERT_SAFE(!router_opts.with_timing_analysis || check_net_delays(net_list, net_delay)); + + if (itry == 1 && router_opts.with_timing_analysis) { + generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat); + } + + float iter_cumm_time = iteration_timer.elapsed_sec(); + float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time; + + //Output progress + print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration); + + prev_iter_cumm_time = iter_cumm_time; + + //Update graphics + if (itry == 1) { + update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info); + } else { + update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info); + } + + if (router_opts.save_routing_per_iteration) { + std::string filename = vtr::string_fmt("iteration_%03d.route", itry); + print_route(net_list, nullptr, filename.c_str(), is_flat); + } + + //Update router stats (total) + router_stats.combine(iter_results.stats); + + /* + * Are we finished? + */ + if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) { + auto& router_ctx = g_vpr_ctx.routing(); + + if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) { + //Save routing + best_routing = router_ctx.route_trees; + best_clb_opins_used_locally = router_ctx.clb_opins_used_locally; + + success = true; + + //Update best metrics + if (router_opts.with_timing_analysis) { + check_net_delays(net_list, net_delay); + best_routing_metrics.sTNS = timing_info->setup_total_negative_slack(); + best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack(); + best_routing_metrics.hTNS = timing_info->hold_total_negative_slack(); + best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack(); + best_routing_metrics.critical_path = critical_path; + } + best_routing_metrics.used_wirelength = wirelength_info.used_wirelength(); + } + + //Decrease pres_fac so that critical connections will take more direct routes + //Note that we use first_iter_pres_fac here (typically zero), and switch to + //use initial_pres_fac on the next iteration. + pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac); + + //Reduce timing tolerances to re-route more delay-suboptimal signals + connections_inf.set_connection_criticality_tolerance(0.7); + connections_inf.set_connection_delay_tolerance(1.01); + + ++legal_convergence_count; + itry_since_last_convergence = 0; + + VTR_ASSERT(success); + } + + if (itry_since_last_convergence == 1) { + //We used first_iter_pres_fac when we started routing again + //after the first routing convergence. Since that is often zero, + //we want to set pres_fac to a reasonable (i.e. typically non-zero) + //value afterwards -- so it grows when multiplied by pres_fac_mult + pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac); + } + + //Have we converged the maximum number of times, did not make any changes, or does it seem + //unlikely additional convergences will improve QoR? + if (legal_convergence_count >= router_opts.max_convergence_count + || iter_results.stats.connections_routed == 0 + || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) { +#ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +#endif + break; //Done routing + } + + /* + * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing? + */ + if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) { +#ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +#endif + //Abort + break; + } + + //Estimate at what iteration we will converge to a legal routing + if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) { + //Only consider aborting if we have a significant number of overused resources + + if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) { + VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration); +#ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +#endif + break; //Abort + } + } + + if (itry == 1 && router_opts.exit_after_first_routing_iteration) { + VTR_LOG("Exiting after first routing iteration as requested\n"); +#ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +#endif + break; + } + + /* + * Prepare for the next iteration + */ + + if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { + num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets); + } + + if (itry >= high_effort_congestion_mode_iteration_threshold) { + //We are approaching the maximum number of routing iterations, + //and still do not have a legal routing. Switch to a mode which + //focuses more on attempting to resolve routing conflicts. + router_congestion_mode = RouterCongestionMode::CONFLICTED; + } + + //Update pres_fac + if (itry == 1) { + pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac); + } else { + pres_fac *= router_opts.pres_fac_mult; + + /* Avoid overflow for high iteration counts, even if acc_cost is big */ + pres_fac = update_draw_pres_fac(std::min(pres_fac, static_cast(HUGE_POSITIVE_FLOAT / 1e5))); + + // Increase short path criticality if it's having a hard time resolving hold violations due to congestion + if (budgeting_inf.if_set()) { + bool rcv_finished = false; + + /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets + * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router + * Increasing this will make it resolve hold faster, but could result in lower circuit quality */ + constexpr float budget_increase_factor = 300e-12; + + if (itry > 5 && worst_negative_slack != 0) + rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup); + if (rcv_finished) + rcv_finished_count--; + else + rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; + } + } + + if (router_congestion_mode == RouterCongestionMode::CONFLICTED) { + //The design appears to have routing conflicts which are difficult to resolve: + // 1) Don't re-route legal connections due to delay. This allows + // the router to focus on the actual conflicts + // 2) Increase the net bounding boxes. This potentially allows + // the router to route around otherwise congested regions + // (at the cost of high run-time). + + //Increase the size of the net bounding boxes to give the router more + //freedom to find alternate paths. + // + //In the case of routing conflicts there are multiple connections competing + //for the same resources which can not resolve the congestion themselves. + //In normal routing mode we try to keep the bounding boxes small to minimize + //run-time, but this can limits how far signals can detour (i.e. they can't + //route outside the bounding box), which can cause conflicts to oscillate back + //and forth without resolving. + // + //By scaling the bounding boxes here, we slowly increase the router's search + //space in hopes of it allowing signals to move further out of the way to + //alleviate the conflicts. + if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) { + //We scale the bounding boxes by BB_SCALE_FACTOR, + //every BB_SCALE_ITER_COUNT iterations. This ensures + //that we give the router some time (BB_SCALE_ITER_COUNT) to try + //resolve/negotiate congestion at the new BB factor. + // + //Note that we increase the BB factor slowly to try and minimize + //the bounding box size (since larger bounding boxes slow the router down). + auto& grid = g_vpr_ctx.device().grid; + int max_grid_dim = std::max(grid.width(), grid.height()); + + //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow + bb_fac = std::min(max_grid_dim, bb_fac * BB_SCALE_FACTOR); + + route_ctx.route_bb = load_route_bb(net_list, bb_fac); + } + + ++itry_conflicted_mode; + } + + if (router_opts.with_timing_analysis) { + if (itry == 1) { + // first iteration sets up the lower bound connection delays since only timing is optimized for + connections_inf.set_stable_critical_path_delay(critical_path.delay()); + connections_inf.set_lower_bound_connection_delays(net_delay); + + //load budgets using information from uncongested delay information + budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts); + + if (router_opts.routing_budgets_algorithm == YOYO) + netlist_router->set_rcv_enabled(true); + } else { + bool stable_routing_configuration = true; + + /* + * Determine if any connection need to be forcibly re-routed due to timing + */ + + //Yes, if explicitly enabled + bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON); + + //Or, if things are not too congested + should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO + && router_congestion_mode == RouterCongestionMode::NORMAL); + + if (should_ripup_for_delay) { + if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) { + // only need to forcibly reroute if critical path grew significantly + stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality, + timing_info, + netlist_pin_lookup, + net_delay); + } + } + + // not stable if any connection needs to be forcibly rerouted + if (stable_routing_configuration) { + connections_inf.set_stable_critical_path_delay(critical_path.delay()); + } + } + } else { + /* If timing analysis is not enabled, make sure that the criticalities and the + * net_delays stay as 0 so that wirelength can be optimized. */ + + for (auto net_id : net_list.nets()) { + for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { + net_delay[net_id][ipin] = 0.; + } + } + } + + if (router_opts.congestion_analysis) profiling::congestion_analysis(); + if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis(); + // profiling::time_on_criticality_analysis(); + } + + if (success) { + VTR_LOG("Restoring best routing\n"); + + auto& router_ctx = g_vpr_ctx.mutable_routing(); + + /* Restore congestion from best route */ + for (auto net_id : net_list.nets()) { + if (route_ctx.route_trees[net_id]) + pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1); + if (best_routing[net_id]) + pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1); + } + router_ctx.route_trees = best_routing; + router_ctx.clb_opins_used_locally = best_clb_opins_used_locally; + + prune_unused_non_configurable_nets(connections_inf, net_list); + + if (router_opts.with_timing_analysis) { + VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay()); + } + + VTR_LOG("Successfully routed after %d routing iterations.\n", itry); + } else { + VTR_LOG("Routing failed.\n"); + + //If the routing fails, print the overused info + print_overused_nodes_status(router_opts, overuse_info); + +#ifdef VTR_ENABLE_DEBUG_LOGGING + if (f_router_debug) + print_invalid_routing_info(net_list, is_flat); +#endif + } + + if (router_opts.with_timing_analysis) { + VTR_LOG("Final Net Connection Criticality Histogram:\n"); + print_router_criticality_histogram(net_list, *timing_info, netlist_pin_lookup, is_flat); + } + + VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes); + VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops); + VTR_LOG( + "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu " + "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", + router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops, + router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, + router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); + for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { + VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); + VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]); + VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]); + } + + VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt); + VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt); + VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout); + VTR_LOG("\n"); + + return success; +} diff --git a/vpr/src/route/route.h b/vpr/src/route/route.h new file mode 100644 index 00000000000..cf6efb26311 --- /dev/null +++ b/vpr/src/route/route.h @@ -0,0 +1,33 @@ +#pragma once + +#include "RoutingDelayCalculator.h" +#include "timing_info.h" +#include "vpr_net_pins_matrix.h" +#include "vpr_types.h" +#include "netlist.h" + +/** Attempts a routing via the AIR algorithm [0]. + * + * \p width_fac specifies the relative width of the channels, while the members of + * \p router_opts determine the value of the costs assigned to routing + * resource node, etc. \p det_routing_arch describes the detailed routing + * architecture (connection and switch boxes) of the FPGA; it is used + * only if a DETAILED routing has been selected. + * + * [0]: K. E. Murray, S. Zhong, and V. Betz, "AIR: A fast but lazy timing-driven FPGA router", in ASPDAC 2020 + * + * \return Success status. */ +bool route(const Netlist<>& net_list, + int width_fac, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + NetPinsMatrix& net_delay, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + t_chan_width_dist chan_width_dist, + t_direct_inf* directs, + int num_directs, + ScreenUpdatePriority first_iteration_priority, + bool is_flat); diff --git a/vpr/src/route/route_budgets.cpp b/vpr/src/route/route_budgets.cpp index ff14ec752ac..00dd14ae2bc 100644 --- a/vpr/src/route/route_budgets.cpp +++ b/vpr/src/route/route_budgets.cpp @@ -44,7 +44,6 @@ #include "vtr_assert.h" #include "vtr_log.h" -#include "route_timing.h" #include "tatum/report/TimingPathFwd.hpp" #include "tatum/base/TimingType.hpp" #include "concrete_timing_info.h" diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 2a2f9cb787e..ccc179320fb 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -1,44 +1,12 @@ -#include -#include -#include -#include -#include -#include - -#include "route_tree.h" -#include "vtr_assert.h" -#include "vtr_util.h" -#include "vtr_log.h" -#include "vtr_digest.h" -#include "vtr_memory.h" - -#include "vpr_types.h" -#include "vpr_error.h" -#include "vpr_utils.h" - -#include "stats.h" -#include "globals.h" -#include "route_export.h" -#include "route_common.h" -#include "route_parallel.h" -#include "route_timing.h" -#include "place_and_route.h" -#include "rr_graph.h" -#include "rr_graph2.h" -#include "read_xml_arch_file.h" -#include "draw.h" -#include "echo_files.h" -#include "atom_netlist_utils.h" - -#include "route_profiling.h" +/** @file Impls for more router utils */ -#include "timing_util.h" -#include "RoutingDelayCalculator.h" -#include "timing_info.h" -#include "tatum/echo_writer.hpp" -#include "binary_heap.h" -#include "bucket.h" +#include "atom_netlist_utils.h" +#include "connection_router_interface.h" #include "draw_global.h" +#include "place_and_route.h" +#include "route_common.h" +#include "route_export.h" +#include "rr_graph.h" /* The numbering relation between the channels and clbs is: * * * @@ -69,7 +37,7 @@ * chan_width_y[0] chan_width_y[1] * * */ -/******************** Subroutines local to route_common.c *******************/ +/******************** Subroutines local to route_common.cpp *******************/ static vtr::vector> load_net_rr_terminals(const RRGraphView& rr_graph, const Netlist<>& net_list, bool is_flat); @@ -107,7 +75,7 @@ void save_routing(vtr::vector>& best_routi saved_clb_opins_used_locally = clb_opins_used_locally; } -/* Empties route_ctx.current_rt and copies over best_routing onto it. +/* Empties route_ctx.route_trees and copies over best_routing onto it. * Also restores the locally used opin data. */ void restore_routing(vtr::vector>& best_routing, t_clb_opins_used& clb_opins_used_locally, @@ -150,170 +118,7 @@ void get_serial_num(const Netlist<>& net_list) { VTR_LOG("Serial number (magic cookie) for the routing is: %d\n", serial_num); } -void try_graph(int width_fac, - const t_router_opts& router_opts, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - t_chan_width_dist chan_width_dist, - t_direct_inf* directs, - int num_directs, - bool is_flat) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - - t_graph_type graph_type; - t_graph_type graph_directionality; - if (router_opts.route_type == GLOBAL) { - graph_type = GRAPH_GLOBAL; - graph_directionality = GRAPH_BIDIR; - } else { - graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); - graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); - } - - /* Set the channel widths */ - t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality); - - /* Free any old routing graph, if one exists. */ - free_rr_graph(); - - /* Set up the routing resource graph defined by this FPGA architecture. */ - int warning_count; - create_rr_graph(graph_type, - device_ctx.physical_tile_types, - device_ctx.grid, - chan_width, - det_routing_arch, - segment_inf, - router_opts, - directs, num_directs, - &warning_count, - is_flat); -} - -bool try_route(const Netlist<>& net_list, - int width_fac, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - NetPinsMatrix& net_delay, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - t_chan_width_dist chan_width_dist, - t_direct_inf* directs, - int num_directs, - ScreenUpdatePriority first_iteration_priority, - bool is_flat) { - /* Attempts a routing via an iterated maze router algorithm. Width_fac * - * specifies the relative width of the channels, while the members of * - * router_opts determine the value of the costs assigned to routing * - * resource node, etc. det_routing_arch describes the detailed routing * - * architecture (connection and switch boxes) of the FPGA; it is used * - * only if a DETAILED routing has been selected. */ - - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& cluster_ctx = g_vpr_ctx.clustering(); - - t_graph_type graph_type; - t_graph_type graph_directionality; - if (router_opts.route_type == GLOBAL) { - graph_type = GRAPH_GLOBAL; - graph_directionality = GRAPH_BIDIR; - } else { - graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); - graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); - } - - /* Set the channel widths */ - t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality); - - /* Set up the routing resource graph defined by this FPGA architecture. */ - int warning_count; - - create_rr_graph(graph_type, - device_ctx.physical_tile_types, - device_ctx.grid, - chan_width, - det_routing_arch, - segment_inf, - router_opts, - directs, - num_directs, - &warning_count, - is_flat); - - //Initialize drawing, now that we have an RR graph - init_draw_coords(width_fac); - - bool success = true; - - /* Allocate and load additional rr_graph information needed only by the router. */ - alloc_and_load_rr_node_route_structs(); - - init_route_structs(net_list, - router_opts.bb_factor, - router_opts.has_choking_spot, - is_flat); - - if (net_list.nets().empty()) { - VTR_LOG_WARN("No nets to route\n"); - } - - if (router_opts.router_algorithm == PARALLEL) { - VTR_LOG("Confirming router algorithm: PARALLEL.\n"); - -#ifdef VPR_USE_TBB - auto& atom_ctx = g_vpr_ctx.atom(); - - IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types); - ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup); - - success = try_parallel_route(net_list, - *det_routing_arch, - router_opts, - analysis_opts, - segment_inf, - net_delay, - netlist_pin_lookup, - timing_info, - delay_calc, - first_iteration_priority, - is_flat); - - profiling::time_on_fanout_analysis(); -#else - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "VPR was not compiled with TBB support required for parallel routing\n"); -#endif - - } else { /* TIMING_DRIVEN route */ - VTR_LOG("Confirming router algorithm: TIMING_DRIVEN.\n"); - auto& atom_ctx = g_vpr_ctx.atom(); - - IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types); - ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup); - success = try_timing_driven_route(net_list, - *det_routing_arch, - router_opts, - analysis_opts, - segment_inf, - net_delay, - netlist_pin_lookup, - timing_info, - delay_calc, - first_iteration_priority, - is_flat); - - profiling::time_on_fanout_analysis(); - } - - return (success); -} - bool feasible_routing() { - /* This routine checks to see if this is a resource-feasible routing. * - * That is, are all rr_node capacity limitations respected? It assumes * - * that the occupancy arrays are up to date when it is called. */ - auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.routing(); @@ -327,7 +132,7 @@ bool feasible_routing() { return (true); } -//Returns all RR nodes in the current routing which are congested +/** Returns all RR nodes in the current routing which are congested */ std::vector collect_congested_rr_nodes() { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -364,10 +169,9 @@ vtr::vector> collect_rr_node_nets() { return rr_node_nets; } +/** Updates pathfinder's occupancy by either adding or removing the + * usage of a resource node. */ void pathfinder_update_single_node_occupancy(RRNodeId inode, int add_or_sub) { - /* Updates pathfinder's occupancy by either adding or removing the - * usage of a resource node. */ - auto& route_ctx = g_vpr_ctx.mutable_routing(); int occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub; @@ -376,14 +180,13 @@ void pathfinder_update_single_node_occupancy(RRNodeId inode, int add_or_sub) { VTR_ASSERT(occ >= 0); } +/** This routine recomputes the acc_cost (accumulated congestion cost) of each + * routing resource for the pathfinder algorithm after all nets have been routed. + * It updates the accumulated cost to by adding in the number of extra signals + * sharing a resource right now (i.e. after each complete iteration) times acc_fac. + * THIS ROUTINE ASSUMES THE OCCUPANCY VALUES IN RR_NODE ARE UP TO DATE. + * This routine also creates a new overuse info for the current routing iteration. */ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& overuse_info) { - /* This routine recomputes the acc_cost (accumulated congestion cost) of each * - * routing resource for the pathfinder algorithm after all nets have been routed. * - * It updates the accumulated cost to by adding in the number of extra signals * - * sharing a resource right now (i.e. after each complete iteration) times acc_fac. * - * THIS ROUTINE ASSUMES THE OCCUPANCY VALUES IN RR_NODE ARE UP TO DATE. * - * This routine also creates a new overuse info for the current routing iteration. */ - auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -417,20 +220,6 @@ void pathfinder_update_cost_from_route_tree(const RouteTreeNode& root, int add_o } } -float update_pres_fac(float new_pres_fac) { - /* This routine should take the new value of the present congestion factor * - * and propagate it to all the relevant data fields in the vpr flow. * - * Currently, it only updates the pres_fac used by the drawing functions */ -#ifndef NO_GRAPHICS - - // Only updates the drawing pres_fac if graphics is enabled - get_draw_state_vars()->pres_fac = new_pres_fac; - -#endif // NO_GRAPHICS - - return new_pres_fac; -} - /* Call this before you route any nets. It frees any old route trees and * sets the list of rr_nodes touched to empty. */ void init_route_structs(const Netlist<>& net_list, @@ -595,12 +384,9 @@ static t_clb_opins_used alloc_and_load_clb_opins_used_locally() { return (clb_opins_used_locally); } -/*the trace lists are only freed after use by the timing-driven placer */ -/*Do not free them after use by the router, since stats, and draw */ -/*routines use the trace values */ +/* Frees the temporary storage needed only during the routing. The + * final routing result is not freed. */ void free_route_structs() { - /* Frees the temporary storage needed only during the routing. The * - * final routing result is not freed. */ auto& route_ctx = g_vpr_ctx.mutable_routing(); if (route_ctx.route_bb.size() != 0) { diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index 68e525e10b0..203e2880059 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -1,14 +1,19 @@ -/************ Defines and types shared by all route files ********************/ #pragma once + +/** @file Misc. router utils: some used by the connection router, some by other + * router files and some used globally. */ + #include #include "clustered_netlist.h" -#include "vtr_vector.h" -#include "heap_type.h" #include "rr_node_fwd.h" #include "router_stats.h" #include "globals.h" -/******* Subroutines in route_common used only by other router modules ******/ +/** This routine checks to see if this is a resource-feasible routing. + * That is, are all rr_node capacity limitations respected? It assumes + * that the occupancy arrays are up to date when it is called. */ +bool feasible_routing(); + vtr::vector load_route_bb(const Netlist<>& net_list, int bb_factor); @@ -23,8 +28,6 @@ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& ove /** Update pathfinder cost of all nodes under root (including root) */ void pathfinder_update_cost_from_route_tree(const RouteTreeNode& root, int add_or_sub); -float update_pres_fac(float new_pres_fac); - void reset_path_costs(const std::vector& visited_rr_nodes); float get_rr_cong_cost(RRNodeId inode, float pres_fac); diff --git a/vpr/src/route/route_debug.cpp b/vpr/src/route/route_debug.cpp new file mode 100644 index 00000000000..c0c8d3dd24e --- /dev/null +++ b/vpr/src/route/route_debug.cpp @@ -0,0 +1,30 @@ +#include "route_debug.h" + +std::atomic_bool f_router_debug = false; + +void enable_router_debug( + const t_router_opts& router_opts, + ParentNetId net, + RRNodeId sink_rr, + int router_iteration, + ConnectionRouterInterface* router) { + bool active_net_debug = (router_opts.router_debug_net >= -1); + bool active_sink_debug = (router_opts.router_debug_sink_rr >= 0); + bool active_iteration_debug = (router_opts.router_debug_iteration >= 0); + + bool match_net = (ParentNetId(router_opts.router_debug_net) == net || router_opts.router_debug_net == -1); + bool match_sink = (router_opts.router_debug_sink_rr == int(size_t((sink_rr))) || router_opts.router_debug_sink_rr < 0); + bool match_iteration = (router_opts.router_debug_iteration == router_iteration || router_opts.router_debug_iteration < 0); + + f_router_debug = active_net_debug || active_sink_debug || active_iteration_debug; + + if (active_net_debug) f_router_debug = f_router_debug && match_net; + if (active_sink_debug) f_router_debug = f_router_debug && match_sink; + if (active_iteration_debug) f_router_debug = f_router_debug && match_iteration; + + router->set_router_debug(f_router_debug); + +#ifndef VTR_ENABLE_DEBUG_LOGGING + VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n"); +#endif +} diff --git a/vpr/src/route/route_debug.h b/vpr/src/route/route_debug.h new file mode 100644 index 00000000000..94c874da706 --- /dev/null +++ b/vpr/src/route/route_debug.h @@ -0,0 +1,20 @@ +#pragma once + +/** @file Utils for debugging the router */ + +#include +#include "connection_router_interface.h" +#include "vpr_types.h" + +/** @brief Run-time flag to control when router debug information is printed + * Note only enables debug output if compiled with VTR_ENABLE_DEBUG_LOGGING defined + * f_router_debug is used to stop the router when a breakpoint is reached. When a breakpoint is reached, this flag is set to true. + * + * In addition f_router_debug is used to print additional debug information during routing, for instance lookahead expected costs + * information. + * + * d2: Made atomic as an attempt to make it work with parallel routing, but don't expect reliable results. */ +extern std::atomic_bool f_router_debug; + +/** Enable f_router_debug if specific sink/net debugging is set in \p router_opts */ +void enable_router_debug(const t_router_opts& router_opts, ParentNetId net, RRNodeId sink_rr, int router_iteration, ConnectionRouterInterface* router); diff --git a/vpr/src/route/route_export.h b/vpr/src/route/route_export.h index 3aa1703647e..971aeba966b 100644 --- a/vpr/src/route/route_export.h +++ b/vpr/src/route/route_export.h @@ -1,43 +1,20 @@ -/******** Function prototypes for functions in route_common.cpp that *********** - ******** are used outside the router modules. ***********/ -#include "vpr_types.h" +#pragma once + +/** @file Function prototypes for functions in route_common.cpp that + * are used outside the router modules. */ + #include -#include "timing_info_fwd.h" + #include "route_common.h" -#include "RoutingDelayCalculator.h" +#include "timing_info_fwd.h" +#include "vpr_types.h" -void try_graph(int width_fac, - const t_router_opts& router_opts, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - t_chan_width_dist chan_width_dist, - t_direct_inf* directs, - int num_directs, - bool is_flat); - -bool try_route(const Netlist<>& net_list, - int width_fac, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - NetPinsMatrix& net_delay, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - t_chan_width_dist chan_width_dist, - t_direct_inf* directs, - int num_directs, - ScreenUpdatePriority first_iteration_priority, - bool is_flat); - -bool feasible_routing(); +#include "RoutingDelayCalculator.h" std::vector collect_congested_rr_nodes(); vtr::vector> collect_rr_node_nets(); -t_clb_opins_used alloc_route_structs(); - void free_route_structs(); void save_routing(vtr::vector>& best_routing, diff --git a/vpr/src/route/route_net.cpp b/vpr/src/route/route_net.cpp new file mode 100644 index 00000000000..8f36e68836b --- /dev/null +++ b/vpr/src/route/route_net.cpp @@ -0,0 +1,338 @@ +/** @file Impls for non-templated net routing fns & utils */ + +#include "route_net.h" +#include "stats.h" + +bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) { + if (router_opts.routing_budgets_algorithm != YOYO) { + return false; + } else if (worst_neg_slack != 0) { + return true; + } + return false; +} + +void setup_routing_resources(int itry, + ParentNetId net_id, + const Netlist<>& net_list, + unsigned num_sinks, + int min_incremental_reroute_fanout, + CBRR& connections_inf, + const t_router_opts& router_opts, + bool ripup_high_fanout_nets) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + /* "tree" points to this net's spot in the global context here, so re-initializing it etc. changes the global state */ + vtr::optional& tree = route_ctx.route_trees[net_id]; + + // for nets below a certain size (min_incremental_reroute_fanout), rip up any old routing + // otherwise, we incrementally reroute by reusing legal parts of the previous iteration + if ((int)num_sinks < min_incremental_reroute_fanout || itry == 1 || ripup_high_fanout_nets) { + profiling::net_rerouted(); + + /* rip up the whole net */ + if (tree) + pathfinder_update_cost_from_route_tree(tree.value().root(), -1); + tree = vtr::nullopt; + + /* re-initialize net */ + tree = RouteTree(net_id); + pathfinder_update_cost_from_route_tree(tree.value().root(), 1); + + // since all connections will be rerouted for this net, clear all of net's forced reroute flags + connections_inf.clear_force_reroute_for_net(net_id); + + // when we don't prune the tree, we also don't know the sink node indices + // thus we'll use functions that act on pin indices like mark_ends instead + // of their versions that act on node indices directly like mark_remaining_ends + mark_ends(net_list, net_id); + } else { + profiling::net_rebuild_start(); + + if (!tree) { + tree = RouteTree(net_id); + pathfinder_update_cost_from_route_tree(tree.value().root(), 1); + } + + /* copy the existing routing + * prune() depends on global occ, so we can't subtract before pruning + * OPT: to skip this copy, return a "diff" from RouteTree::prune */ + RouteTree tree2 = tree.value(); + + // Skip this check if RCV is enabled, as RCV can use another method to cause reroutes + VTR_ASSERT_SAFE(should_route_net(net_id, connections_inf, true) || router_opts.routing_budgets_algorithm == YOYO); + + // Prune the copy (using congestion data before subtraction) + vtr::optional pruned_tree2 = tree2.prune(connections_inf); + + // Subtract congestion using the non-pruned original + pathfinder_update_cost_from_route_tree(tree->root(), -1); + + if (pruned_tree2) { //Partially pruned + profiling::route_tree_preserved(); + + // Add back congestion for the pruned route tree + pathfinder_update_cost_from_route_tree(pruned_tree2->root(), 1); + // pruned_tree2 is no longer required -> we can move rather than copy + tree = std::move(pruned_tree2.value()); + } else { // Fully destroyed + profiling::route_tree_pruned(); + + // Initialize only to source + tree = RouteTree(net_id); + pathfinder_update_cost_from_route_tree(tree->root(), 1); + } + + profiling::net_rebuild_end(num_sinks, tree->get_remaining_isinks().size()); + + // still need to calculate the tree's time delay + tree->reload_timing(); + + // check for R_upstream C_downstream and edge correctness + VTR_ASSERT_SAFE(tree->is_valid()); + + // congestion should've been pruned away + VTR_ASSERT_SAFE(tree->is_uncongested()); + + // mark remaining ends + mark_remaining_ends(net_id); + + // mark the lookup (rr_node_route_inf) for existing tree elements as NO_PREVIOUS so add_to_path stops when it reaches one of them + update_rr_route_inf_from_tree(tree->root()); + } + + // completed constructing the partial route tree and updated all other data structures to match +} + +void update_rr_base_costs(int fanout) { + auto& device_ctx = g_vpr_ctx.mutable_device(); + + float factor; + size_t index; + + /* Other reasonable values for factor include fanout and 1 */ + factor = sqrt(fanout); + + for (index = CHANX_COST_INDEX_START; index < device_ctx.rr_indexed_data.size(); index++) { + if (device_ctx.rr_indexed_data[RRIndexedDataId(index)].T_quadratic > 0.) { /* pass transistor */ + device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost * factor; + } else { + device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost; + } + } +} + +void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + for (auto& node : rt_node.all_nodes()) { + RRNodeId inode = node.inode; + route_ctx.rr_node_route_inf[inode].prev_node = RRNodeId::INVALID(); + route_ctx.rr_node_route_inf[inode].prev_edge = RREdgeId::INVALID(); + + // path cost should be unset + VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)); + VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].backward_path_cost)); + } +} + +bool should_route_net(ParentNetId net_id, + CBRR& connections_inf, + bool if_force_reroute) { + auto& route_ctx = g_vpr_ctx.routing(); + auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + if (!route_ctx.route_trees[net_id]) { + /* No routing yet. */ + return true; + } + + const RouteTree& tree = route_ctx.route_trees[net_id].value(); + + /* Walk over all rt_nodes in the net */ + for (auto& rt_node : tree.all_nodes()) { + RRNodeId inode = rt_node.inode; + int occ = route_ctx.rr_node_route_inf[inode].occ(); + int capacity = rr_graph.node_capacity(inode); + + if (occ > capacity) { + return true; /* overuse detected */ + } + + if (rt_node.is_leaf()) { //End of a branch + // even if net is fully routed, not complete if parts of it should get ripped up (EXPERIMENTAL) + if (if_force_reroute) { + if (connections_inf.should_force_reroute_connection(net_id, inode)) { + return true; + } + } + } + } + + /* If all sinks have been routed to without overuse, no need to route this */ + if (tree.get_remaining_isinks().empty()) + return false; + + return true; +} + +bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) { + if (wirelength_info.used_wirelength_ratio() > router_opts.init_wirelength_abort_threshold) { + VTR_LOG("Wire length usage ratio %g exceeds limit of %g, fail routing.\n", + wirelength_info.used_wirelength_ratio(), + router_opts.init_wirelength_abort_threshold); + return true; + } + return false; +} + +float get_net_pin_criticality(const SetupHoldTimingInfo* timing_info, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + float max_criticality, + float criticality_exp, + ParentNetId net_id, + ParentPinId pin_id, + bool is_flat) { + float pin_criticality = 0.0; + const auto& route_ctx = g_vpr_ctx.routing(); + + if (route_ctx.is_clock_net[net_id]) { + pin_criticality = max_criticality; + } else { + pin_criticality = calculate_clb_net_pin_criticality(*timing_info, + netlist_pin_lookup, + pin_id, + is_flat); + } + + /* Pin criticality is between 0 and 1. + * Shift it downwards by 1 - max_criticality (max_criticality is 0.99 by default, + * so shift down by 0.01) and cut off at 0. This means that all pins with small + * criticalities (<0.01) get criticality 0 and are ignored entirely, and everything + * else becomes a bit less critical. This effect becomes more pronounced if + * max_criticality is set lower. */ + // VTR_ASSERT(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01); + pin_criticality = std::max(pin_criticality - (1.0 - max_criticality), 0.0); + + /* Take pin criticality to some power (1 by default). */ + pin_criticality = std::pow(pin_criticality, criticality_exp); + + /* Cut off pin criticality at max_criticality. */ + pin_criticality = std::min(pin_criticality, max_criticality); + + return pin_criticality; +} + +size_t calculate_wirelength_available() { + auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + size_t available_wirelength = 0; + // But really what's happening is that this for loop iterates over every node and determines the available wirelength + for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { + const t_rr_type channel_type = rr_graph.node_type(rr_id); + if (channel_type == CHANX || channel_type == CHANY) { + available_wirelength += rr_graph.node_capacity(rr_id) * rr_graph.node_length(rr_id); + } + } + return available_wirelength; +} + +WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength) { + size_t used_wirelength = 0; + VTR_ASSERT(available_wirelength > 0); + + auto& route_ctx = g_vpr_ctx.routing(); + + for (auto net_id : net_list.nets()) { + if (!net_list.net_is_ignored(net_id) + && net_list.net_sinks(net_id).size() != 0 /* Globals don't count. */ + && route_ctx.route_trees[net_id]) { + int bends = 0, wirelength = 0, segments = 0; + bool is_absorbed; + get_num_bends_and_length(net_id, &bends, &wirelength, &segments, &is_absorbed); + + used_wirelength += wirelength; + } + } + + return WirelengthInfo(available_wirelength, used_wirelength); +} + +t_bb calc_current_bb(const RouteTree& tree) { + auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + auto& grid = device_ctx.grid; + + t_bb bb; + bb.xmin = grid.width() - 1; + bb.ymin = grid.height() - 1; + bb.layer_min = grid.get_num_layers() - 1; + bb.xmax = 0; + bb.ymax = 0; + bb.layer_max = 0; + + for (auto& rt_node : tree.all_nodes()) { + //The router interprets RR nodes which cross the boundary as being + //'within' of the BB. Only those which are *strictly* out side the + //box are excluded, hence we use the nodes xhigh/yhigh for xmin/xmax, + //and xlow/ylow for xmax/ymax calculations + bb.xmin = std::min(bb.xmin, rr_graph.node_xhigh(rt_node.inode)); + bb.ymin = std::min(bb.ymin, rr_graph.node_yhigh(rt_node.inode)); + bb.layer_min = std::min(bb.layer_min, rr_graph.node_layer(rt_node.inode)); + bb.xmax = std::max(bb.xmax, rr_graph.node_xlow(rt_node.inode)); + bb.ymax = std::max(bb.ymax, rr_graph.node_ylow(rt_node.inode)); + bb.layer_max = std::max(bb.layer_max, rr_graph.node_layer(rt_node.inode)); + } + + VTR_ASSERT(bb.xmin <= bb.xmax); + VTR_ASSERT(bb.ymin <= bb.ymax); + + return bb; +} + +// Initializes net_delay based on best-case delay estimates from the router lookahead +void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, + const Netlist<>& net_list, + const vtr::vector>& net_rr_terminals, + NetPinsMatrix& net_delay, + const RRGraphView& rr_graph, + bool is_flat) { + t_conn_cost_params cost_params; + cost_params.criticality = 1.; // Ensures lookahead returns delay value + + for (auto net_id : net_list.nets()) { + if (net_list.net_is_ignored(net_id)) continue; + + RRNodeId source_rr = net_rr_terminals[net_id][0]; + + for (size_t ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { + RRNodeId sink_rr = net_rr_terminals[net_id][ipin]; + + float est_delay = get_cost_from_lookahead(router_lookahead, + rr_graph, + source_rr, + sink_rr, + 0., + cost_params, + is_flat); + VTR_ASSERT(std::isfinite(est_delay) && est_delay < std::numeric_limits::max()); + + net_delay[net_id][ipin] = est_delay; + } + } +} + +void update_net_delays_from_route_tree(float* net_delay, + const Netlist<>& net_list, + ParentNetId inet, + TimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator) { + auto& route_ctx = g_vpr_ctx.routing(); + const RouteTree& tree = route_ctx.route_trees[inet].value(); + + for (unsigned int isink = 1; isink < net_list.net_pins(inet).size(); isink++) { + update_net_delay_from_isink(net_delay, tree, isink, net_list, inet, timing_info, pin_timing_invalidator); + } +} diff --git a/vpr/src/route/route_net.h b/vpr/src/route/route_net.h new file mode 100644 index 00000000000..fc08a0ddb19 --- /dev/null +++ b/vpr/src/route/route_net.h @@ -0,0 +1,119 @@ +#pragma once + +/** @file Net and sink routing functions, and other utils used by them. */ + +#include +#include + +#include "connection_based_routing.h" +#include "connection_router_interface.h" +#include "heap_type.h" +#include "netlist.h" +#include "route_budgets.h" +#include "route_utils.h" +#include "router_stats.h" +#include "router_lookahead.h" +#include "routing_predictor.h" +#include "rr_graph_type.h" +#include "spatial_route_tree_lookup.h" +#include "timing_info_fwd.h" +#include "vpr_types.h" +#include "vpr_utils.h" + +#include "NetPinTimingInvalidator.h" + +/** Results from attempting to route a net. + * success: Could we route it? + * was_rerouted: Is the routing different from the last one? (set by try_* functions) + * retry_with_full_bb: Should we retry this net with a full-device bounding box? (used in the parallel router) + * + * I'm fine with returning 3 bytes from a fn: consider an enum class if this becomes too big */ +struct NetResultFlags { + bool success = false; + bool was_rerouted = false; + bool retry_with_full_bb = false; +}; + +/** When RCV is enabled, it's necessary to be able to completely ripup high fanout nets + * if there is still negative hold slack. Normally the router will prune the illegal branches + * of high fanout nets, this will bypass that */ +bool check_hold(const t_router_opts& router_opts, float worst_neg_slack); + +/** Return a full-device bounding box */ +inline t_bb full_device_bb(void) { + const auto& grid = g_vpr_ctx.device().grid; + return {0, (int)grid.width() - 1, 0, (int)grid.height() - 1, 0, (int)grid.get_num_layers() - 1}; +} + +/** Get criticality of \p pin_id in net \p net_id from 0 to 1 */ +float get_net_pin_criticality(const SetupHoldTimingInfo* timing_info, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + float max_criticality, + float criticality_exp, + ParentNetId net_id, + ParentPinId pin_id, + bool is_flat); + +/** Returns true if the specified net fanout is classified as high fanout */ +constexpr bool is_high_fanout(int fanout, int fanout_threshold) { + if (fanout_threshold < 0 || fanout < fanout_threshold) + return false; + return true; +} + +/** Build a partial route tree in global context for \p net_id from the legal + * connections from last iteration. + * Along the way do: + * - update pathfinder costs to be accurate to the partial route tree + * - mark the rr_node sinks as targets to be reached. */ +void setup_routing_resources(int itry, + ParentNetId net_id, + const Netlist<>& net_list, + unsigned num_sinks, + int min_incremental_reroute_fanout, + CBRR& connections_inf, + const t_router_opts& router_opts, + bool ripup_high_fanout_nets); + +/** Detect if net should be routed or not */ +bool should_route_net(ParentNetId net_id, + CBRR& connections_inf, + bool if_force_reroute); + +/** Update net_delay value for a single sink in a RouteTree. */ +inline void update_net_delay_from_isink(float* net_delay, + const RouteTree& tree, + int isink, + const Netlist<>& net_list, + ParentNetId inet, + TimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator) { + float new_delay = tree.find_by_isink(isink)->Tdel; + + if (pin_timing_invalidator && new_delay != net_delay[isink]) { + //Delay changed, invalidate for incremental timing update + VTR_ASSERT_SAFE(timing_info); + ParentPinId pin = net_list.net_pin(inet, isink); + pin_timing_invalidator->invalidate_connection(pin, timing_info); + } + + net_delay[isink] = new_delay; +} + +/** Goes through all the sinks of this net and copies their delay values from + * the route_tree to the net_delay array. */ +void update_net_delays_from_route_tree(float* net_delay, + const Netlist<>& net_list, + ParentNetId inet, + TimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator); + +/** Change the base costs of rr_nodes globally according to # of fanouts + * TODO: is this even thread safe? */ +void update_rr_base_costs(int fanout); + +/** Traverses down a route tree and updates rr_node_inf for all nodes + * to reflect that these nodes have already been routed to */ +void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node); + +#include "route_net.tpp" diff --git a/vpr/src/route/route_net.tpp b/vpr/src/route/route_net.tpp new file mode 100644 index 00000000000..8542d8f306d --- /dev/null +++ b/vpr/src/route/route_net.tpp @@ -0,0 +1,478 @@ +#pragma once + +/** @file Header implementations for templated net routing fns. */ + +#include +#include "route_net.h" + +#include "connection_router_interface.h" +#include "describe_rr_node.h" +#include "draw.h" +#include "route_common.h" +#include "route_debug.h" +#include "route_profiling.h" +#include "rr_graph_fwd.h" + +/** Attempt to route a single net. + * + * @param router The ConnectionRouter instance + * @param net_list Input netlist + * @param net_id + * @param itry # of iteration + * @param pres_fac + * @param router_opts + * @param connections_inf + * @param router_stats + * @param pin_criticality + * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes + * @param net_delay + * @param netlist_pin_lookup + * @param timing_info + * @param pin_timing_invalidator + * @param budgeting_inf + * @param worst_neg_slack + * @param routing_predictor + * @param choking_spots + * @param is_flat + * @return NetResultFlags for this net */ +template +inline NetResultFlags route_net(ConnectionRouter& router, + const Netlist<>& net_list, + const ParentNetId& net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + NetPinsMatrix& net_delays, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + SetupHoldTimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_negative_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + NetResultFlags flags; + + bool reroute_for_hold = false; + if (budgeting_inf.if_set()) { + reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); + reroute_for_hold &= worst_negative_slack != 0; + } + + flags.success = true; + + if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ + return flags; + } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ + return flags; + } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { + return flags; + } + + // track time spent vs fanout + profiling::net_fanout_start(); + + flags.was_rerouted = true; // Flag to record whether routing was actually changed + + auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + unsigned int num_sinks = net_list.net_sinks(net_id).size(); + + VTR_LOGV_DEBUG(f_router_debug, "Routing Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); + + setup_routing_resources( + itry, + net_id, + net_list, + num_sinks, + router_opts.min_incremental_reroute_fanout, + connections_inf, + router_opts, + check_hold(router_opts, worst_negative_slack)); + + VTR_ASSERT(route_ctx.route_trees[net_id]); + RouteTree& tree = route_ctx.route_trees[net_id].value(); + + bool high_fanout = is_high_fanout(num_sinks, router_opts.high_fanout_threshold); + + SpatialRouteTreeLookup spatial_route_tree_lookup; + if (high_fanout) { + spatial_route_tree_lookup = build_route_tree_spatial_lookup(net_list, + route_ctx.route_bb, + net_id, + tree.root()); + } + + /* 1-indexed! */ + std::vector pin_criticality(tree.num_sinks() + 1, 0); + + // after this point the route tree is correct + // remaining_targets from this point on are the **pin indices** that have yet to be routed + std::vector remaining_targets(tree.get_remaining_isinks().begin(), tree.get_remaining_isinks().end()); + + // calculate criticality of remaining target pins + for (int ipin : remaining_targets) { + auto pin = net_list.net_pin(net_id, ipin); + pin_criticality[ipin] = get_net_pin_criticality(timing_info, + netlist_pin_lookup, + router_opts.max_criticality, + router_opts.criticality_exp, + net_id, + pin, + is_flat); + } + + // compare the criticality of different sink nodes + sort(begin(remaining_targets), end(remaining_targets), [&](int a, int b) { + return pin_criticality[a] > pin_criticality[b]; + }); + + /* Update base costs according to fanout and criticality rules */ + update_rr_base_costs(num_sinks); + + t_conn_delay_budget conn_delay_budget; + t_conn_cost_params cost_params; + cost_params.astar_fac = router_opts.astar_fac; + cost_params.bend_cost = router_opts.bend_cost; + cost_params.pres_fac = pres_fac; + cost_params.delay_budget = ((budgeting_inf.if_set()) ? &conn_delay_budget : nullptr); + + // Pre-route to clock source for clock nets (marked as global nets) + if (net_list.net_is_global(net_id) && router_opts.two_stage_clock_routing) { + //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK); + RRNodeId sink_node(device_ctx.virtual_clock_network_root_idx); + + enable_router_debug(router_opts, net_id, sink_node, itry, &router); + + VTR_LOGV_DEBUG(f_router_debug, "Pre-routing global net %zu\n", size_t(net_id)); + + // Set to the max timing criticality which should intern minimize clock insertion + // delay by selecting a direct route from the clock source to the virtual sink + cost_params.criticality = router_opts.max_criticality; + + return pre_route_to_clock_root(router, + net_id, + net_list, + sink_node, + cost_params, + router_opts.high_fanout_threshold, + tree, + spatial_route_tree_lookup, + router_stats, + is_flat); + } + + if (budgeting_inf.if_set()) { + budgeting_inf.set_should_reroute(net_id, false); + } + + // explore in order of decreasing criticality (no longer need sink_order array) + for (unsigned itarget = 0; itarget < remaining_targets.size(); ++itarget) { + int target_pin = remaining_targets[itarget]; + + RRNodeId sink_rr = route_ctx.net_rr_terminals[net_id][target_pin]; + + enable_router_debug(router_opts, net_id, sink_rr, itry, &router); + + cost_params.criticality = pin_criticality[target_pin]; + + if (budgeting_inf.if_set()) { + conn_delay_budget.max_delay = budgeting_inf.get_max_delay_budget(net_id, target_pin); + conn_delay_budget.target_delay = budgeting_inf.get_delay_target(net_id, target_pin); + conn_delay_budget.min_delay = budgeting_inf.get_min_delay_budget(net_id, target_pin); + conn_delay_budget.short_path_criticality = budgeting_inf.get_crit_short_path(net_id, target_pin); + conn_delay_budget.routing_budgets_algorithm = router_opts.routing_budgets_algorithm; + } + + profiling::conn_start(); + + // build a branch in the route tree to the target + auto sink_flags = route_sink(router, + net_list, + net_id, + itarget, + target_pin, + cost_params, + router_opts, + tree, + spatial_route_tree_lookup, + router_stats, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); + + flags.retry_with_full_bb |= sink_flags.retry_with_full_bb; + + if (!sink_flags.success) { + flags.success = false; + VTR_LOG("Routing failed for net %d\n", net_id); + return flags; + } + + profiling::conn_finish(size_t(route_ctx.net_rr_terminals[net_id][0]), + size_t(sink_rr), + pin_criticality[target_pin]); + + ++router_stats.connections_routed; + } // finished all sinks + + ++router_stats.nets_routed; + profiling::net_finish(); + + /* For later timing analysis. */ + + float* net_delay = net_delays[net_id].data(); + + // may have to update timing delay of the previously legally reached sinks since downstream capacitance could be changed + update_net_delays_from_route_tree(net_delay, + net_list, + net_id, + timing_info, + pin_timing_invalidator); + + if (router_opts.update_lower_bound_delays) { + for (int ipin : remaining_targets) { + connections_inf.update_lower_bound_connection_delay(net_id, ipin, net_delay[ipin]); + } + } + + VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); + VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); + + router.empty_rcv_route_tree_set(); // ? + + profiling::net_fanout_end(net_list.net_sinks(net_id).size()); + + route_ctx.net_status.set_is_routed(net_id, true); + return flags; +} + +/** Route to a "virtual sink" in the netlist which corresponds to the start point + * of the global clock network. */ +template +inline NetResultFlags pre_route_to_clock_root(ConnectionRouter& router, + ParentNetId net_id, + const Netlist<>& net_list, + RRNodeId sink_node, + const t_conn_cost_params cost_params, + int high_fanout_threshold, + RouteTree& tree, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + bool is_flat) { + const auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + auto& m_route_ctx = g_vpr_ctx.mutable_routing(); + + NetResultFlags out; + + bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), high_fanout_threshold); + + VTR_LOGV_DEBUG(f_router_debug, "Net %zu pre-route to (%s)\n", size_t(net_id), describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str()); + profiling::sink_criticality_start(); + + t_bb bounding_box = route_ctx.route_bb[net_id]; + + router.clear_modified_rr_node_info(); + + bool found_path, retry_with_full_bb; + t_heap cheapest; + ConnectionParameters conn_params(net_id, + -1, + false, + std::unordered_map()); + + std::tie(found_path, retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree( + tree.root(), + sink_node, + cost_params, + bounding_box, + router_stats, + conn_params); + + // TODO: Parts of the rest of this function are repetitive to code in route_sink. Should refactor. + if (!found_path) { + ParentBlockId src_block = net_list.net_driver_block(net_id); + VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n", + net_list.block_name(src_block).c_str(), + describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str(), + net_list.net_name(net_id).c_str(), + size_t(net_id)); + if (f_router_debug) { + update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); + } + router.reset_path_costs(); + out.success = false; + out.retry_with_full_bb = retry_with_full_bb; + return out; + } + + profiling::sink_criticality_end(cost_params.criticality); + + /* This is a special pre-route to a sink that does not correspond to any * + * netlist pin, but which can be reached from the global clock root drive * + * points. Therefore, we can set the net pin index of the sink node to * + * OPEN (meaning illegal) as it is not meaningful for this sink. */ + vtr::optional new_branch, new_sink; + std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, OPEN, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat); + + VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup)); + + if (f_router_debug) { + std::string msg = vtr::string_fmt("Routed Net %zu connection to RR node %d successfully", size_t(net_id), sink_node); + update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr); + } + + if (new_branch) + pathfinder_update_cost_from_route_tree(new_branch.value(), 1); + + // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink + // do this by resetting all the path_costs that have been touched while routing to the current sink + router.reset_path_costs(); + + // Post route cleanup: + // - remove sink from route tree and fix routing for all nodes leading to the sink ("freeze") + // - free up virtual sink occupancy + tree.freeze(); + m_route_ctx.rr_node_route_inf[sink_node].set_occ(0); + + // routed to a sink successfully + out.success = true; + return out; +} + +/** Attempt to route a single sink (target_pin) in a net. + * In the process, update global pathfinder costs, rr_node_route_inf and extend the global RouteTree + * for this net. + * + * @param router The ConnectionRouter instance + * @param net_list Input netlist + * @param net_id + * @param itarget # of this connection in the net (only used for debug output) + * @param target_pin # of this sink in the net (TODO: is it the same thing as itarget?) + * @param cost_params + * @param router_opts + * @param[in, out] tree RouteTree describing the current routing state + * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes + * @param spatial_rt_lookup + * @param router_stats + * @param budgeting_inf + * @param routing_predictor + * @param choking_spots + * @param is_flat + * @return NetResultFlags for this sink to be bubbled up through route_net */ +template +inline NetResultFlags route_sink(ConnectionRouter& router, + const Netlist<>& net_list, + ParentNetId net_id, + unsigned itarget, + int target_pin, + const t_conn_cost_params cost_params, + const t_router_opts& router_opts, + RouteTree& tree, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { + const auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + NetResultFlags flags; + + profiling::sink_criticality_start(); + + RRNodeId sink_node = route_ctx.net_rr_terminals[net_id][target_pin]; + VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str()); + + router.clear_modified_rr_node_info(); + + bool found_path; + t_heap cheapest; + t_bb bounding_box = route_ctx.route_bb[net_id]; + + bool net_is_global = net_list.net_is_global(net_id); + bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), router_opts.high_fanout_threshold); + constexpr float HIGH_FANOUT_CRITICALITY_THRESHOLD = 0.9; + bool sink_critical = (cost_params.criticality > HIGH_FANOUT_CRITICALITY_THRESHOLD); + bool net_is_clock = route_ctx.is_clock_net[net_id] != 0; + + bool has_choking_spot = ((int)choking_spots[target_pin].size() != 0) && router_opts.has_choking_spot; + ConnectionParameters conn_params(net_id, target_pin, has_choking_spot, choking_spots[target_pin]); + + //We normally route high fanout nets by only adding spatially close-by routing to the heap (reduces run-time). + //However, if the current sink is 'critical' from a timing perspective, we put the entire route tree back onto + //the heap to ensure it has more flexibility to find the best path. + if (high_fanout && !sink_critical && !net_is_global && !net_is_clock && -routing_predictor.get_slope() > router_opts.high_fanout_max_slope) { + std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree_high_fanout(tree.root(), + sink_node, + cost_params, + bounding_box, + spatial_rt_lookup, + router_stats, + conn_params); + } else { + std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), + sink_node, + cost_params, + bounding_box, + router_stats, + conn_params); + } + + if (!found_path) { + ParentBlockId src_block = net_list.net_driver_block(net_id); + ParentBlockId sink_block = net_list.pin_block(*(net_list.net_pins(net_id).begin() + target_pin)); + VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n", + net_list.block_name(src_block).c_str(), + net_list.block_name(sink_block).c_str(), + net_list.net_name(net_id).c_str(), + size_t(net_id)); + if (f_router_debug) { + update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); + } + flags.success = false; + router.reset_path_costs(); + return flags; + } + + profiling::sink_criticality_end(cost_params.criticality); + + RRNodeId inode(cheapest.index); + route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ + + vtr::optional new_branch, new_sink; + std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, target_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat); + + VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup)); + + if (f_router_debug) { + std::string msg = vtr::string_fmt("Routed Net %zu connection %d to RR node %d successfully", size_t(net_id), itarget, sink_node); + update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr); + } + + if (budgeting_inf.if_set() && cheapest.path_data != nullptr && cost_params.delay_budget) { + if (cheapest.path_data->backward_delay < cost_params.delay_budget->min_delay) { + budgeting_inf.set_should_reroute(net_id, true); + } + } + + /* update global occupancy from the new branch */ + if (new_branch) + pathfinder_update_cost_from_route_tree(new_branch.value(), 1); + + // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink + // do this by resetting all the path_costs that have been touched while routing to the current sink + router.reset_path_costs(); + + // routed to a sink successfully + flags.success = true; + return flags; +} diff --git a/vpr/src/route/route_parallel.cpp b/vpr/src/route/route_parallel.cpp deleted file mode 100644 index b3a6dda3b72..00000000000 --- a/vpr/src/route/route_parallel.cpp +++ /dev/null @@ -1,1059 +0,0 @@ -/** @file Functions specific to parallel routing. - * Reuse code from route_timing.cpp where possible. */ - -#include -#include -#include -#include -#include -#include -#include - -#include "binary_heap.h" -#include "bucket.h" -#include "concrete_timing_info.h" -#include "connection_router.h" -#include "draw.h" -#include "globals.h" -#include "netlist_fwd.h" -#include "partition_tree.h" -#include "read_route.h" -#include "route_export.h" -#include "route_common.h" -#include "route_timing.h" -#include "route_parallel.h" -// all functions in profiling:: namespace, which are only activated if PROFILE is defined -#include "route_profiling.h" -#include "timing_util.h" -#include "vtr_time.h" - -#include "NetPinTimingInvalidator.h" - -#ifdef VPR_USE_TBB - -# include "tbb/enumerable_thread_specific.h" -# include "tbb/task_group.h" - -/** route_net and similar functions need many bits of state collected from various - * parts of VPR, collect them here for ease of use */ -template -class RouteIterCtx { - public: - tbb::enumerable_thread_specific routers; - const Netlist<>& net_list; - int itry; - float pres_fac; - const t_router_opts& router_opts; - CBRR& connections_inf; - tbb::enumerable_thread_specific router_stats; - tbb::enumerable_thread_specific route_structs; - NetPinsMatrix& net_delay; - const ClusteredPinAtomPinsLookup& netlist_pin_lookup; - std::shared_ptr timing_info; - NetPinTimingInvalidator* pin_timing_invalidator; - route_budgets& budgeting_inf; - float worst_negative_slack; - const RoutingPredictor& routing_predictor; - const vtr::vector>>& choking_spots; - bool is_flat; -}; - -/** Helper for reduce_partition_tree. Traverse \p node's subtree and collect results into \p results */ -static void reduce_partition_tree_helper(const PartitionTreeNode& node, RouteIterResults& results); - -/** - * Try to route in parallel with the given ConnectionRouter. - * ConnectionRouter is typically templated with a heap type, so this lets us - * route with different heap implementations. - * - * This fn is very similar to try_timing_driven_route_tmpl, but it has enough small changes to - * warrant a copy. (TODO: refactor this to reuse more of the serial code) - * - * @param netlist Input netlist - * @param det_routing_arch Routing architecture. See definition of t_det_routing_arch for more details. - * @param router_opts Command line options for the router. - * @param analysis_opts Command line options for timing analysis (used in generate_route_timing_reports()) - * @param segment_inf - * @param[in, out] net_delay - * @param netlist_pin_lookup - * @param[in, out] timing_info Interface to the timing analyzer - * @param delay_calc - * @param first_iteration_priority - * @param is_flat - * @return Success status - * - * The reason that try_parallel_route_tmpl (and descendents) are being - * templated over is because using a virtual interface instead fully templating - * the router results in a 5% runtime increase. - * - * The reason to template over the router in general is to enable runtime - * selection of core router algorithm's, specifically the router heap. */ -template -static bool try_parallel_route_tmpl(const Netlist<>& netlist, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat); - -template -static RouteIterResults route_with_partition_tree(tbb::task_group& g, RouteIterCtx& ctx); - -template -static RouteIterResults route_without_partition_tree(std::vector& nets_to_route, RouteIterCtx& ctx); - -/************************ Subroutine definitions *****************************/ - -bool try_parallel_route(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat) { - switch (router_opts.router_heap) { - case e_heap_type::BINARY_HEAP: - return try_parallel_route_tmpl>(net_list, - det_routing_arch, - router_opts, - analysis_opts, - segment_inf, - net_delay, - netlist_pin_lookup, - timing_info, - delay_calc, - first_iteration_priority, - is_flat); - break; - case e_heap_type::BUCKET_HEAP_APPROXIMATION: - return try_parallel_route_tmpl>(net_list, - det_routing_arch, - router_opts, - analysis_opts, - segment_inf, - net_delay, - netlist_pin_lookup, - timing_info, - delay_calc, - first_iteration_priority, - is_flat); - default: - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap); - } -} - -template -bool try_parallel_route_tmpl(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat) { - // Make sure template type ConnectionRouter is a ConnectionRouterInterface. - /// TODO: Template on "NetRouter" instead of ConnectionRouter to avoid copying top level routing logic? - static_assert(std::is_base_of::value, "ConnectionRouter must implement the ConnectionRouterInterface"); - - const auto& device_ctx = g_vpr_ctx.device(); - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - auto choking_spots = set_nets_choking_spots(net_list, - route_ctx.net_terminal_groups, - route_ctx.net_terminal_group_num, - router_opts.has_choking_spot, - is_flat); - - //Initially, the router runs normally trying to reduce congestion while - //balancing other metrics (timing, wirelength, run-time etc.) - RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL; - - //Initialize and properly size the lookups for profiling - profiling::profiling_initialization(get_max_pins_per_net(net_list)); - - /* - * Configure the routing predictor - */ - RoutingPredictor routing_predictor; - float abort_iteration_threshold = std::numeric_limits::infinity(); //Default no early abort - if (router_opts.routing_failure_predictor == SAFE) { - abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations; - } else if (router_opts.routing_failure_predictor == AGGRESSIVE) { - abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations; - } else { - VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting"); - } - - float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations; - - /* Set delay of ignored signals to zero. Non-ignored net delays are set by - * update_net_delays_from_route_tree() inside parallel_route_net(), - * which is only called for non-ignored nets. */ - for (auto net_id : net_list.nets()) { - if (net_list.net_is_ignored(net_id)) { - for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { - net_delay[net_id][ipin] = 0.; - } - } - } - - CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat}; - - route_budgets budgeting_inf(net_list, is_flat); - - // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized. - const RouterLookahead* router_lookahead = get_cached_router_lookahead(det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - - if (is_flat) { - // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since - // they haven't been initialized when the map related to global resources was initialized. - auto cache_key = route_ctx.router_lookahead_cache_key_; - std::unique_ptr mut_router_lookahead(route_ctx.cached_router_lookahead_.release()); - VTR_ASSERT(mut_router_lookahead); - route_ctx.cached_router_lookahead_.clear(); - if (!router_opts.read_intra_cluster_router_lookahead.empty()) { - mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead); - } else { - mut_router_lookahead->compute_intra_tile(); - } - route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead)); - router_lookahead = get_cached_router_lookahead(det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - if (!router_opts.write_intra_cluster_router_lookahead.empty()) { - router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead); - } - } - - VTR_ASSERT(router_lookahead != nullptr); - - /* - * Routing parameters - */ - float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */ - int bb_fac = router_opts.bb_factor; - - //When routing conflicts are detected the bounding boxes are scaled - //by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations - constexpr float BB_SCALE_FACTOR = 2; - constexpr int BB_SCALE_ITER_COUNT = 5; - - size_t available_wirelength = calculate_wirelength_available(); - - /* - * Routing status and metrics - */ - bool routing_is_successful = false; - WirelengthInfo wirelength_info; - OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes()); - tatum::TimingPathInfo critical_path; - int itry; //Routing iteration number - int itry_conflicted_mode = 0; - - /* - * Best result so far - */ - vtr::vector> best_routing; - t_clb_opins_used best_clb_opins_used_locally; - RoutingMetrics best_routing_metrics; - int legal_convergence_count = 0; - std::vector scratch; - - /* - * On the first routing iteration ignore congestion to get reasonable net - * delay estimates. Set criticalities to 1 when timing analysis is on to - * optimize timing, and to 0 when timing analysis is off to optimize routability. - * - * Subsequent iterations use the net delays from the previous iteration. - */ - std::shared_ptr route_timing_info; - { - vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities"); - if (timing_info) { - if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL) { - //First routing iteration, make all nets critical for a min-delay routing - route_timing_info = make_constant_timing_info(1.); - } else { - VTR_ASSERT(router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD); - - { - //Estimate initial connection delays from the router lookahead - init_net_delay_from_lookahead(*router_lookahead, - net_list, - route_ctx.net_rr_terminals, - net_delay, - device_ctx.rr_graph, - is_flat); - - //Run STA to get estimated criticalities - timing_info->update(); - } - route_timing_info = timing_info; - } - } else { - //Not timing driven, force criticality to zero for a routability-driven routing - route_timing_info = make_constant_timing_info(0.); - } - VTR_LOG("Initial Net Connection Criticality Histogram:\n"); - print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat); - } - - std::unique_ptr pin_timing_invalidator; - if (timing_info) { - pin_timing_invalidator = make_net_pin_timing_invalidator( - router_opts.timing_update_type, - net_list, - netlist_pin_lookup, - atom_ctx.nlist, - atom_ctx.lookup, - *timing_info->timing_graph(), - is_flat); - } - - tbb::task_group tbb_task_group; - - /* Set up thread local storage. - * tbb::enumerable_thread_specific will construct the elements as needed. - * see https://spec.oneapi.io/versions/1.0-rev-3/elements/oneTBB/source/thread_local_storage/enumerable_thread_specific_cls/construct_destroy_copy.html */ - auto routers = tbb::enumerable_thread_specific(ConnectionRouter( - device_ctx.grid, - *router_lookahead, - device_ctx.rr_graph.rr_nodes(), - &device_ctx.rr_graph, - device_ctx.rr_rc_data, - device_ctx.rr_graph.rr_switch(), - route_ctx.rr_node_route_inf, - is_flat)); /* Here we provide an "exemplar" to copy for each thread */ - auto router_stats_thread = tbb::enumerable_thread_specific(); - auto route_structs = tbb::enumerable_thread_specific(net_list); - - RouterStats router_stats; - float prev_iter_cumm_time = 0; - vtr::Timer iteration_timer; - int num_net_bounding_boxes_updated = 0; - int itry_since_last_convergence = -1; - - // This heap is used for reserve_locally_used_opins. - BinaryHeap small_heap; - small_heap.init_heap(device_ctx.grid); - - // When RCV is enabled the router will not stop unless negative hold slack is 0 - // In some cases this isn't doable, due to global nets or intracluster routing issues - // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack - // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved - constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15; - - int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; - - print_route_status_header(); - for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) { - for (auto& stats : router_stats_thread) { - init_router_stats(stats); - } - - /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */ - for (auto net_id : net_list.nets()) { - route_ctx.net_status.set_is_routed(net_id, false); - route_ctx.net_status.set_is_fixed(net_id, false); - } - - if (itry_since_last_convergence >= 0) { - ++itry_since_last_convergence; - } - - // Calculate this once and pass it into net routing to check if should reroute for hold - float worst_negative_slack = 0; - if (budgeting_inf.if_set()) { - worst_negative_slack = timing_info->hold_total_negative_slack(); - } - - /** - * Route nets in parallel using the partition tree. Need to pass on - * some context to each task. - * TODO: Move pin_criticality into timing_driven_route_net(). - * TODO: Move rt_node_of_sink lookup into RouteTree. - */ - RouteIterCtx iter_ctx = { - routers, - net_list, - itry, - pres_fac, - router_opts, - connections_inf, - router_stats_thread, - route_structs, - net_delay, - netlist_pin_lookup, - route_timing_info, - pin_timing_invalidator.get(), - budgeting_inf, - worst_negative_slack, - routing_predictor, - choking_spots, - is_flat}; - - vtr::Timer net_routing_timer; - RouteIterResults iter_results = route_with_partition_tree(tbb_task_group, iter_ctx); - PartitionTreeDebug::log("Routing all nets took " + std::to_string(net_routing_timer.elapsed_sec()) + " s"); - - if (!iter_results.is_routable) { - return false; // Impossible to route - } - - /* Note that breakpoints won't work properly with parallel routing. - * (how to do that? stop all threads when a thread hits a breakpoint? too complicated) - * However we still make an attempt to update graphics */ -# ifndef NO_GRAPHICS - for (auto net_id : net_list.nets()) { - update_router_info_and_check_bp(BP_NET_ID, size_t(net_id)); - } -# endif - - // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose - bool rip_up_local_opins = (itry == 1 ? false : true); - if (!is_flat) { - reserve_locally_used_opins(&small_heap, pres_fac, - router_opts.acc_fac, rip_up_local_opins, is_flat); - } - - /* - * Calculate metrics for the current routing - */ - bool routing_is_feasible = feasible_routing(); - float est_success_iteration = routing_predictor.estimate_success_iteration(); - - //Update resource costs and overuse info - if (itry == 1) { - pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */ - } else { - pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info); - } - - wirelength_info = calculate_wirelength_info(net_list, available_wirelength); - routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes); - - if (timing_info) { - //Update timing based on the new routing - //Note that the net delays have already been updated by parallel_route_net - timing_info->update(); - timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing - pin_timing_invalidator->reset(); - - //Use the real timing analysis criticalities for subsequent routing iterations - // 'route_timing_info' is what is actually passed into the net/connection routers, - // and for the 1st iteration may not be the actual STA results (e.g. all criticalities set to 1) - route_timing_info = timing_info; - - critical_path = timing_info->least_slack_critical_path(); - - VTR_ASSERT_SAFE(timing_driven_check_net_delays(net_list, net_delay)); - - if (itry == 1) { - generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat); - } - } - - float iter_cumm_time = iteration_timer.elapsed_sec(); - float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time; - - //Output progress - print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration); - PartitionTreeDebug::log("Iteration " + std::to_string(itry) + " took " + std::to_string(iter_elapsed_time) + " s"); - - prev_iter_cumm_time = iter_cumm_time; - - //Update graphics - if (itry == 1) { - update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info); - } else { - update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info); - } - - if (router_opts.save_routing_per_iteration) { - std::string filename = vtr::string_fmt("iteration_%03d.route", itry); - print_route(net_list, nullptr, filename.c_str(), is_flat); - } - - // Update router stats - update_router_stats(router_stats, iter_results.stats); - - /* - * Are we finished? - */ - if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) { - auto& router_ctx = g_vpr_ctx.routing(); - - if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) { - //Save routing - best_routing = router_ctx.route_trees; - best_clb_opins_used_locally = router_ctx.clb_opins_used_locally; - - routing_is_successful = true; - - //Update best metrics - if (timing_info) { - timing_driven_check_net_delays(net_list, net_delay); - - best_routing_metrics.sTNS = timing_info->setup_total_negative_slack(); - best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack(); - best_routing_metrics.hTNS = timing_info->hold_total_negative_slack(); - best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack(); - best_routing_metrics.critical_path = critical_path; - } - best_routing_metrics.used_wirelength = wirelength_info.used_wirelength(); - } - - //Decrease pres_fac so that critical connections will take more direct routes - //Note that we use first_iter_pres_fac here (typically zero), and switch to - //use initial_pres_fac on the next iteration. - pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); - - //Reduce timing tolerances to re-route more delay-suboptimal signals - connections_inf.set_connection_criticality_tolerance(0.7); - connections_inf.set_connection_delay_tolerance(1.01); - - ++legal_convergence_count; - itry_since_last_convergence = 0; - - VTR_ASSERT(routing_is_successful); - } - - if (itry_since_last_convergence == 1) { - //We used first_iter_pres_fac when we started routing again - //after the first routing convergence. Since that is often zero, - //we want to set pres_fac to a reasonable (i.e. typically non-zero) - //value afterwards -- so it grows when multiplied by pres_fac_mult - pres_fac = update_pres_fac(router_opts.initial_pres_fac); - } - - //Have we converged the maximum number of times, did not make any changes, or does it seem - //unlikely additional convergences will improve QoR? - if (legal_convergence_count >= router_opts.max_convergence_count - || iter_results.stats.connections_routed == 0 - || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) { -# ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -# endif - break; //Done routing - } - - /* - * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing? - */ - if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) { -# ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -# endif - //Abort - break; - } - - //Estimate at what iteration we will converge to a legal routing - if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) { - //Only consider aborting if we have a significant number of overused resources - - if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) { - VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration); -# ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -# endif - break; //Abort - } - } - - if (itry == 1 && router_opts.exit_after_first_routing_iteration) { - VTR_LOG("Exiting after first routing iteration as requested\n"); -# ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -# endif - break; - } - - /* - * Prepare for the next iteration - */ - - if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { - num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets, net_list, router_opts.high_fanout_threshold); - } - - if (itry >= high_effort_congestion_mode_iteration_threshold) { - //We are approaching the maximum number of routing iterations, - //and still do not have a legal routing. Switch to a mode which - //focuses more on attempting to resolve routing conflicts. - router_congestion_mode = RouterCongestionMode::CONFLICTED; - } - - //Update pres_fac - if (itry == 1) { - pres_fac = update_pres_fac(router_opts.initial_pres_fac); - } else { - pres_fac *= router_opts.pres_fac_mult; - - /* Avoid overflow for high iteration counts, even if acc_cost is big */ - pres_fac = update_pres_fac(std::min(pres_fac, static_cast(HUGE_POSITIVE_FLOAT / 1e5))); - - // Increase short path criticality if it's having a hard time resolving hold violations due to congestion - if (budgeting_inf.if_set()) { - bool rcv_finished = false; - - /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets - * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router - * Increasing this will make it resolve hold faster, but could result in lower circuit quality */ - constexpr float budget_increase_factor = 300e-12; - - if (itry > 5 && worst_negative_slack != 0) rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup); - if (rcv_finished) - rcv_finished_count--; - else - rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; - } - } - - if (router_congestion_mode == RouterCongestionMode::CONFLICTED) { - //The design appears to have routing conflicts which are difficult to resolve: - // 1) Don't re-route legal connections due to delay. This allows - // the router to focus on the actual conflicts - // 2) Increase the net bounding boxes. This potentially allows - // the router to route around otherwise congested regions - // (at the cost of high run-time). - - //Increase the size of the net bounding boxes to give the router more - //freedom to find alternate paths. - // - //In the case of routing conflicts there are multiple connections competing - //for the same resources which can not resolve the congestion themselves. - //In normal routing mode we try to keep the bounding boxes small to minimize - //run-time, but this can limits how far signals can detour (i.e. they can't - //route outside the bounding box), which can cause conflicts to oscillate back - //and forth without resolving. - // - //By scaling the bounding boxes here, we slowly increase the router's search - //space in hopes of it allowing signals to move further out of the way to - //alleviate the conflicts. - if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) { - //We scale the bounding boxes by BB_SCALE_FACTOR, - //every BB_SCALE_ITER_COUNT iterations. This ensures - //that we give the router some time (BB_SCALE_ITER_COUNT) to try - //resolve/negotiate congestion at the new BB factor. - // - //Note that we increase the BB factor slowly to try and minimize - //the bounding box size (since larger bounding boxes slow the router down). - auto& grid = g_vpr_ctx.device().grid; - int max_grid_dim = std::max(grid.width(), grid.height()); - - //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow - bb_fac = std::min(max_grid_dim, bb_fac * BB_SCALE_FACTOR); - - route_ctx.route_bb = load_route_bb(net_list, bb_fac); - } - - ++itry_conflicted_mode; - } - - if (timing_info) { - if (should_setup_lower_bound_connection_delays(itry, router_opts)) { - // first iteration sets up the lower bound connection delays since only timing is optimized for - connections_inf.set_stable_critical_path_delay(critical_path.delay()); - connections_inf.set_lower_bound_connection_delays(net_delay); - - //load budgets using information from uncongested delay information - budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts); - /*for debugging purposes*/ - // if (budgeting_inf.if_set()) { - // budgeting_inf.print_route_budget(std::string("route_budgets_") + std::to_string(itry) + ".txt", net_delay); - // } - - if (router_opts.routing_budgets_algorithm == YOYO) { - for (auto& router : routers) { - router.set_rcv_enabled(true); - } - } - - } else { - bool stable_routing_configuration = true; - - /* - * Determine if any connection need to be forcibly re-routed due to timing - */ - - //Yes, if explicitly enabled - bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON); - - //Or, if things are not too congested - should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO - && router_congestion_mode == RouterCongestionMode::NORMAL); - - if (should_ripup_for_delay) { - if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) { - // only need to forcibly reroute if critical path grew significantly - stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality, - timing_info, - netlist_pin_lookup, - net_delay); - } - } - - // not stable if any connection needs to be forcibly rerouted - if (stable_routing_configuration) { - connections_inf.set_stable_critical_path_delay(critical_path.delay()); - } - } - } else { - /* If timing analysis is not enabled, make sure that the criticalities and the - * net_delays stay as 0 so that wirelength can be optimized. */ - - for (auto net_id : net_list.nets()) { - for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { - net_delay[net_id][ipin] = 0.; - } - } - } - - if (router_opts.congestion_analysis) profiling::congestion_analysis(); - if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis(); - // profiling::time_on_criticality_analysis(); - } - - if (routing_is_successful) { - VTR_LOG("Restoring best routing\n"); - - auto& router_ctx = g_vpr_ctx.mutable_routing(); - - /* Restore congestion from best route */ - for (auto net_id : net_list.nets()) { - if (route_ctx.route_trees[net_id]) - pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1); - if (best_routing[net_id]) - pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1); - } - router_ctx.route_trees = best_routing; - router_ctx.clb_opins_used_locally = best_clb_opins_used_locally; - - prune_unused_non_configurable_nets(connections_inf, net_list); - - if (timing_info) { - VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay()); - } - - VTR_LOG("Successfully routed after %d routing iterations.\n", itry); - } else { - VTR_LOG("Routing failed.\n"); - - //If the routing fails, print the overused info - print_overused_nodes_status(router_opts, overuse_info); - -# ifdef VTR_ENABLE_DEBUG_LOGGING - if (f_router_debug) print_invalid_routing_info(net_list, is_flat); -# endif - } - - VTR_LOG("Final Net Connection Criticality Histogram:\n"); - print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat); - - VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes); - VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops); - VTR_LOG( - "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu " - "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", - router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops, - router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, - router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); - for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { - VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); - VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]); - VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]); - } - - VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt); - VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt); - VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout); - VTR_LOG("\n"); - - PartitionTreeDebug::write("partition_tree.log"); - return routing_is_successful; -} - -/** Try routing a net. This calls timing_driven_route_net. - * The only difference is that it returns a "retry_net" flag, which means that the net - * couldn't be routed with the default bounding box and needs a full-device BB. - * This is required when routing in parallel, because the threads ensure data separation based on BB size. - * The single-thread router just retries with a full-device BB and does not need to notify the caller. - * TODO: make the serial router follow this execution path to decrease code duplication */ -template -NetResultFlags try_parallel_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - const ParentNetId& net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_negative_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - NetResultFlags flags; - - bool reroute_for_hold = false; - if (budgeting_inf.if_set()) { - reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); - reroute_for_hold &= worst_negative_slack != 0; - } - if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ - flags.success = true; - } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ - flags.success = true; - } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { - flags.success = true; - } else { - // track time spent vs fanout - profiling::net_fanout_start(); - - vtr::Timer routing_timer; - flags = timing_driven_route_net(router, - net_list, - net_id, - itry, - pres_fac, - router_opts, - connections_inf, - router_stats, - pin_criticality, - net_delay[net_id].data(), - netlist_pin_lookup, - timing_info, - pin_timing_invalidator, - budgeting_inf, - worst_negative_slack, - routing_predictor, - choking_spots, - is_flat); - - profiling::net_fanout_end(net_list.net_sinks(net_id).size()); - - /* Impossible to route? (disconnected rr_graph) */ - if (flags.success) { - route_ctx.net_status.set_is_routed(net_id, true); - } else { - VTR_LOG("Routing failed for net %d\n", net_id); - } - - flags.was_rerouted = true; //Flag to record whether routing was actually changed - } - - return flags; -} - -/* Helper for route_partition_tree(). */ -template -void route_partition_tree_helper(tbb::task_group& g, - PartitionTreeNode& node, - RouteIterCtx& ctx, - vtr::linear_map& nets_to_retry) { - /* Sort so net with most sinks is routed first. */ - std::sort(node.nets.begin(), node.nets.end(), [&](const ParentNetId id1, const ParentNetId id2) -> bool { - return ctx.net_list.net_sinks(id1).size() > ctx.net_list.net_sinks(id2).size(); - }); - - node.is_routable = true; - node.rerouted_nets.clear(); - - vtr::Timer t; - for (auto net_id : node.nets) { - auto flags = try_parallel_route_net( - ctx.routers.local(), - ctx.net_list, - net_id, - ctx.itry, - ctx.pres_fac, - ctx.router_opts, - ctx.connections_inf, - ctx.router_stats.local(), - ctx.route_structs.local().pin_criticality, - ctx.net_delay, - ctx.netlist_pin_lookup, - ctx.timing_info, - ctx.pin_timing_invalidator, - ctx.budgeting_inf, - ctx.worst_negative_slack, - ctx.routing_predictor, - ctx.choking_spots[net_id], - ctx.is_flat); - - if (!flags.success && !flags.retry_with_full_bb) { - node.is_routable = false; - } - if (flags.was_rerouted) { - node.rerouted_nets.push_back(net_id); - } - /* If we need to retry this net with full-device BB, it will go up to the top - * of the tree, so remove it from this node and keep track of it */ - if (flags.retry_with_full_bb) { - node.nets.erase(std::remove(node.nets.begin(), node.nets.end(), net_id), node.nets.end()); - nets_to_retry[net_id] = true; - } - } - - PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s"); - - /* add left and right trees to task queue */ - if (node.left && node.right) { - g.run([&]() { - route_partition_tree_helper(g, *node.left, ctx, nets_to_retry); - }); - g.run([&]() { - route_partition_tree_helper(g, *node.right, ctx, nets_to_retry); - }); - } else { - VTR_ASSERT(!node.left && !node.right); // there shouldn't be a node with a single branch - } -} - -/** Reduce results from partition tree into a single RouteIterResults */ -static void reduce_partition_tree_helper(const PartitionTreeNode& node, RouteIterResults& results) { - results.is_routable &= node.is_routable; - const std::vector& rerouted = node.rerouted_nets; - results.rerouted_nets.insert(results.rerouted_nets.end(), rerouted.begin(), rerouted.end()); - - if (node.left) - reduce_partition_tree_helper(*node.left, results); - if (node.right) - reduce_partition_tree_helper(*node.right, results); -} - -/** Route all nets in parallel using the partitioning information in the PartitionTree. - * - * @param[in, out] g TBB task group to dispatch tasks. - * @param[in, out] tree The partition tree. Non-const reference because iteration results get written on the nodes. - * @param[in, out] ctx RouteIterCtx containing all the necessary bits of state for routing. - * @return RouteIterResults combined from all threads. - * - * See comments in PartitionTreeNode for how parallel routing works. */ -template -RouteIterResults route_partition_tree(tbb::task_group& g, - PartitionTree& tree, - RouteIterCtx& ctx) { - auto& device_ctx = g_vpr_ctx.device(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - /* a net id -> retry? vector - * not a bool vector or a set because multiple threads may be writing on it */ - vtr::linear_map nets_to_retry; - - route_partition_tree_helper(g, tree.root(), ctx, nets_to_retry); - g.wait(); - - /* grow bounding box and add to top level if there is any net to retry */ - for (const auto& kv : nets_to_retry) { - if (kv.second) { - ParentNetId net_id = kv.first; - route_ctx.route_bb[net_id] = { - 0, - (int)(device_ctx.grid.width() - 1), - 0, - (int)(device_ctx.grid.height() - 1), - 0, - (int)(device_ctx.grid.get_num_layers() - 1)}; - tree.root().nets.push_back(net_id); - } - } - - RouteIterResults out; - reduce_partition_tree_helper(tree.root(), out); - for (auto& thread_stats : ctx.router_stats) { - update_router_stats(out.stats, thread_stats); - } - return out; -} - -/* Build a partition tree and route with it */ -template -static RouteIterResults route_with_partition_tree(tbb::task_group& g, RouteIterCtx& ctx) { - vtr::Timer t2; - PartitionTree partition_tree(ctx.net_list); - float total_prep_time = t2.elapsed_sec(); - VTR_LOG("# Built partition tree in %f seconds\n", total_prep_time); - - return route_partition_tree(g, partition_tree, ctx); -} - -/* Route serially */ -template -static RouteIterResults route_without_partition_tree(std::vector& nets_to_route, RouteIterCtx& ctx) { - RouteIterResults out; - - /* Sort so net with most sinks is routed first. */ - std::sort(nets_to_route.begin(), nets_to_route.end(), [&](const ParentNetId id1, const ParentNetId id2) -> bool { - return ctx.net_list.net_sinks(id1).size() > ctx.net_list.net_sinks(id2).size(); - }); - - for (auto net_id : nets_to_route) { - auto flags = try_timing_driven_route_net( - ctx.routers.local(), - ctx.net_list, - net_id, - ctx.itry, - ctx.pres_fac, - ctx.router_opts, - ctx.connections_inf, - ctx.router_stats.local(), - ctx.route_structs.local().pin_criticality, - ctx.route_structs.local().rt_node_of_sink, - ctx.net_delay, - ctx.netlist_pin_lookup, - ctx.timing_info, - ctx.pin_timing_invalidator, - ctx.budgeting_inf, - ctx.worst_negative_slack, - ctx.routing_predictor, - ctx.choking_spots[net_id], - ctx.is_flat); - - if (!flags.success) { - out.is_routable = false; - } - if (flags.was_rerouted) { - out.rerouted_nets.push_back(net_id); - } - } - - update_router_stats(out.stats, ctx.router_stats.local()); - - return out; -} - -#endif // VPR_USE_TBB diff --git a/vpr/src/route/route_parallel.h b/vpr/src/route/route_parallel.h deleted file mode 100644 index b6b4766469f..00000000000 --- a/vpr/src/route/route_parallel.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once - -#include -#include -#include "connection_based_routing.h" -#include "netlist.h" -#include "vpr_types.h" - -#include "vpr_utils.h" -#include "timing_info_fwd.h" -#include "route_budgets.h" -#include "router_stats.h" -#include "router_lookahead.h" -#include "spatial_route_tree_lookup.h" -#include "connection_router_interface.h" -#include "heap_type.h" -#include "routing_predictor.h" - -#ifdef VPR_USE_TBB -/** Route in parallel. The number of threads is set by the global -j option to VPR. - * Return success status. */ -bool try_parallel_route(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat); -#endif diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp deleted file mode 100644 index 2b497066e32..00000000000 --- a/vpr/src/route/route_timing.cpp +++ /dev/null @@ -1,2263 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NetPinTimingInvalidator.h" -#include "netlist_fwd.h" -#include "rr_graph_fwd.h" -#include "vtr_assert.h" -#include "vtr_log.h" -#include "vtr_time.h" - -#include "vpr_utils.h" -#include "vpr_types.h" -#include "vpr_error.h" - -#include "globals.h" -#include "read_route.h" -#include "route_export.h" -#include "route_common.h" -#include "route_timing.h" -#include "net_delay.h" -#include "stats.h" -#include "echo_files.h" -#include "draw.h" -#include "breakpoint.h" -#include "move_utils.h" -#include "rr_graph.h" -#include "routing_predictor.h" -#include "VprTimingGraphResolver.h" - -// all functions in profiling:: namespace, which are only activated if PROFILE is defined -#include "route_profiling.h" - -#include "concrete_timing_info.h" -#include "timing_util.h" -#include "route_budgets.h" -#include "binary_heap.h" -#include "bucket.h" -#include "connection_router.h" - -#include "tatum/TimingReporter.hpp" -#include "overuse_report.h" - -/* - * File-scope variables - */ - -/** - * @brief Run-time flag to control when router debug information is printed - * Note only enables debug output if compiled with VTR_ENABLE_DEBUG_LOGGING defined - * f_router_debug is used to stop the router when a breakpoint is reached. When a breakpoint is reached, this flag is set to true. - * - * In addition f_router_debug is used to print additional debug information during routing, for instance lookahead expected costs - * information. - */ -bool f_router_debug = false; - -//Count the number of times the router has failed -static int num_routing_failed = 0; - -/******************** Subroutines local to route_timing.cpp ********************/ - -/** Attempt to route a single sink (target_pin) in a net. - * In the process, update global pathfinder costs, rr_node_route_inf and extend the global RouteTree - * for this net. - * - * @param router The ConnectionRouter instance - * @param net_list Input netlist - * @param net_id - * @param itarget # of this connection in the net (only used for debug output) - * @param target_pin # of this sink in the net (TODO: is it the same thing as itarget?) - * @param cost_params - * @param router_opts - * @param[in, out] tree RouteTree describing the current routing state - * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes - * @param spatial_rt_lookup - * @param router_stats - * @param budgeting_inf - * @param routing_predictor - * @param choking_spots - * @param is_flat - * @return NetResultFlags for this sink to be bubbled up through timing_driven_route_net */ -template -static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - unsigned itarget, - int target_pin, - const t_conn_cost_params cost_params, - const t_router_opts& router_opts, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - route_budgets& budgeting_inf, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat); - -/** Return tuple of: - * bool: Did we find a path for each sink in this net? - * bool: Should the caller retry with a full-device bounding box? */ -template -static std::tuple timing_driven_pre_route_to_clock_root(ConnectionRouter& router, - ParentNetId net_id, - const Netlist<>& net_list, - RRNodeId sink_node, - const t_conn_cost_params cost_params, - int high_fanout_threshold, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - bool is_flat, - bool can_grow_bb); - -static void setup_routing_resources(int itry, - ParentNetId net_id, - const Netlist<>& net_list, - unsigned num_sinks, - int min_incremental_reroute_fanout, - CBRR& connections_inf, - const t_router_opts& router_opts, - bool ripup_high_fanout_nets); - -static void update_net_delays_from_route_tree(float* net_delay, - const Netlist<>& net_list, - ParentNetId inet, - TimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator); - -static bool check_hold(const t_router_opts& router_opts, float worst_neg_slack); - -static float get_net_pin_criticality(const std::shared_ptr timing_info, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - float max_criticality, - float criticality_exp, - ParentNetId net_id, - ParentPinId pin_id, - bool is_flat); - -struct more_sinks_than { - const Netlist<>& net_list_; - more_sinks_than(const Netlist<>& net_list) - : net_list_(net_list) {} - inline bool operator()(const ParentNetId& net_index1, const ParentNetId& net_index2) { - return net_list_.net_sinks(net_index1).size() > net_list_.net_sinks(net_index2).size(); - } -}; - -static bool is_high_fanout(int fanout, int fanout_threshold); - -// The reason that try_timing_driven_route_tmpl (and descendents) are being -// templated over is because using a virtual interface instead fully templating -// the router results in a 5% runtime increase. -// -// The reason to template over the router in general is to enable runtime -// selection of core router algorithm's, specifically the router heap. -template -static bool try_timing_driven_route_tmpl(const Netlist<>& netlist, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat); - -/************************ Subroutine definitions *****************************/ -bool try_timing_driven_route(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat) { - switch (router_opts.router_heap) { - case e_heap_type::BINARY_HEAP: - return try_timing_driven_route_tmpl>(net_list, - det_routing_arch, - router_opts, - analysis_opts, - segment_inf, - net_delay, - netlist_pin_lookup, - timing_info, - delay_calc, - first_iteration_priority, - is_flat); - break; - case e_heap_type::BUCKET_HEAP_APPROXIMATION: - return try_timing_driven_route_tmpl>(net_list, - det_routing_arch, - router_opts, - analysis_opts, - segment_inf, - net_delay, - netlist_pin_lookup, - timing_info, - delay_calc, - first_iteration_priority, - is_flat); - default: - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap); - } -} - -template -bool try_timing_driven_route_tmpl(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat) { - /* Timing-driven routing algorithm. The timing graph (includes slack) * - * must have already been allocated, and net_delay must have been allocated. * - * Returns true if the routing succeeds, false otherwise. */ - - // Make sure template type ConnectionRouter is a ConnectionRouterInterface. - static_assert(std::is_base_of::value, "ConnectionRouter must implement the ConnectionRouterInterface"); - - const auto& device_ctx = g_vpr_ctx.device(); - const auto& atom_ctx = g_vpr_ctx.atom(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - auto choking_spots = set_nets_choking_spots(net_list, - route_ctx.net_terminal_groups, - route_ctx.net_terminal_group_num, - router_opts.has_choking_spot, - is_flat); - - //Initially, the router runs normally trying to reduce congestion while - //balancing other metrics (timing, wirelength, run-time etc.) - RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL; - - //Initialize and properly size the lookups for profiling - profiling::profiling_initialization(get_max_pins_per_net(net_list)); - - //sort so net with most sinks is routed first. - auto sorted_nets = std::vector(net_list.nets().begin(), net_list.nets().end()); - std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than(net_list)); - - /* - * Configure the routing predictor - */ - RoutingPredictor routing_predictor; - float abort_iteration_threshold = std::numeric_limits::infinity(); //Default no early abort - if (router_opts.routing_failure_predictor == SAFE) { - abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations; - } else if (router_opts.routing_failure_predictor == AGGRESSIVE) { - abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations; - } else { - VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting"); - } - - float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations; - - /* Set delay of ignored signals to zero. Non-ignored net delays are set by - * update_net_delays_from_route_tree() inside timing_driven_route_net(), - * which is only called for non-ignored nets. */ - for (auto net_id : net_list.nets()) { - if (net_list.net_is_ignored(net_id)) { - for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { - net_delay[net_id][ipin] = 0.; - } - } - } - - CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat}; - - route_budgets budgeting_inf(net_list, is_flat); - - // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized. - const RouterLookahead* router_lookahead = get_cached_router_lookahead(det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - - if (is_flat) { - // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since - // they haven't been initialized when the map related to global resources was initialized. - auto cache_key = route_ctx.router_lookahead_cache_key_; - std::unique_ptr mut_router_lookahead(route_ctx.cached_router_lookahead_.release()); - VTR_ASSERT(mut_router_lookahead); - route_ctx.cached_router_lookahead_.clear(); - if (!router_opts.read_intra_cluster_router_lookahead.empty()) { - mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead); - } else { - mut_router_lookahead->compute_intra_tile(); - } - route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead)); - router_lookahead = get_cached_router_lookahead(det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - if (!router_opts.write_intra_cluster_router_lookahead.empty()) { - router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead); - } - } - - VTR_ASSERT(router_lookahead != nullptr); - - /* - * Routing parameters - */ - float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */ - int bb_fac = router_opts.bb_factor; - - //When routing conflicts are detected the bounding boxes are scaled - //by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations - constexpr float BB_SCALE_FACTOR = 2; - constexpr int BB_SCALE_ITER_COUNT = 5; - - size_t available_wirelength = calculate_wirelength_available(); - - /* - * Routing status and metrics - */ - bool routing_is_successful = false; - WirelengthInfo wirelength_info; - OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes()); - tatum::TimingPathInfo critical_path; - int itry; //Routing iteration number - int itry_conflicted_mode = 0; - - /* - * Best result so far - */ - vtr::vector> best_routing; - t_clb_opins_used best_clb_opins_used_locally; - RoutingMetrics best_routing_metrics; - int legal_convergence_count = 0; - - ConnectionRouter router( - device_ctx.grid, - *router_lookahead, - device_ctx.rr_graph.rr_nodes(), - &device_ctx.rr_graph, - device_ctx.rr_rc_data, - device_ctx.rr_graph.rr_switch(), - route_ctx.rr_node_route_inf, - is_flat); - - /* - * On the first routing iteration ignore congestion to get reasonable net - * delay estimates. Set criticalities to 1 when timing analysis is on to - * optimize timing, and to 0 when timing analysis is off to optimize routability. - * - * Subsequent iterations use the net delays from the previous iteration. - */ - std::shared_ptr route_timing_info; - { - vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities"); - if (timing_info) { - if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL) { - //First routing iteration, make all nets critical for a min-delay routing - route_timing_info = make_constant_timing_info(1.); - } else { - VTR_ASSERT(router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD); - - { - //Estimate initial connection delays from the router lookahead - init_net_delay_from_lookahead(*router_lookahead, - net_list, - route_ctx.net_rr_terminals, - net_delay, - device_ctx.rr_graph, - is_flat); - - //Run STA to get estimated criticalities - timing_info->update(); - } - route_timing_info = timing_info; - } - } else { - //Not timing driven, force criticality to zero for a routability-driven routing - route_timing_info = make_constant_timing_info(0.); - } - VTR_LOG("Initial Net Connection Criticality Histogram:\n"); - print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat); - } - - std::unique_ptr pin_timing_invalidator; - if (timing_info) { - pin_timing_invalidator = make_net_pin_timing_invalidator( - router_opts.timing_update_type, - net_list, - netlist_pin_lookup, - atom_ctx.nlist, - atom_ctx.lookup, - *timing_info->timing_graph(), - is_flat); - } - - RouterStats router_stats; - init_router_stats(router_stats); - timing_driven_route_structs route_structs(net_list); - float prev_iter_cumm_time = 0; - vtr::Timer iteration_timer; - int num_net_bounding_boxes_updated = 0; - int itry_since_last_convergence = -1; - - // This heap is used for reserve_locally_used_opins. - BinaryHeap small_heap; - small_heap.init_heap(device_ctx.grid); - - // When RCV is enabled the router will not stop unless negative hold slack is 0 - // In some cases this isn't doable, due to global nets or intracluster routing issues - // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack - // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved - constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15; - - int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; - - print_route_status_header(); - for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) { - RouterStats router_iteration_stats; - init_router_stats(router_iteration_stats); - std::vector rerouted_nets; - - /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */ - for (auto net_id : net_list.nets()) { - route_ctx.net_status.set_is_routed(net_id, false); - route_ctx.net_status.set_is_fixed(net_id, false); - } - - if (itry_since_last_convergence >= 0) { - ++itry_since_last_convergence; - } - - // Calculate this once and pass it into net routing to check if should reroute for hold - float worst_negative_slack = 0; - if (budgeting_inf.if_set()) { - worst_negative_slack = timing_info->hold_total_negative_slack(); - } - - /* - * Route each net - */ - for (auto net_id : sorted_nets) { - NetResultFlags flags = try_timing_driven_route_net(router, - net_list, - net_id, - itry, - pres_fac, - router_opts, - connections_inf, - router_iteration_stats, - route_structs.pin_criticality, - net_delay, - netlist_pin_lookup, - route_timing_info, - pin_timing_invalidator.get(), - budgeting_inf, - worst_negative_slack, - routing_predictor, - choking_spots[net_id], - is_flat); - - if (!flags.success) { - return false; //Impossible to route - } - - if (flags.was_rerouted) { - rerouted_nets.push_back(net_id); -#ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_NET_ID, size_t(net_id)); -#endif - } - } - - // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose - bool rip_up_local_opins = (itry == 1 ? false : true); - if (!is_flat) { - reserve_locally_used_opins(&small_heap, pres_fac, - router_opts.acc_fac, rip_up_local_opins, is_flat); - } - - /* - * Calculate metrics for the current routing - */ - bool routing_is_feasible = feasible_routing(); - float est_success_iteration = routing_predictor.estimate_success_iteration(); - - //Update resource costs and overuse info - if (itry == 1) { - pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */ - } else { - pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info); - } - - wirelength_info = calculate_wirelength_info(net_list, available_wirelength); - routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes); - - if (timing_info) { - //Update timing based on the new routing - //Note that the net delays have already been updated by timing_driven_route_net - timing_info->update(); - timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing - pin_timing_invalidator->reset(); - - //Use the real timing analysis criticalities for subsequent routing iterations - // 'route_timing_info' is what is actually passed into the net/connection routers, - // and for the 1st iteration may not be the actual STA results (e.g. all criticalities set to 1) - route_timing_info = timing_info; - - critical_path = timing_info->least_slack_critical_path(); - - VTR_ASSERT_SAFE(timing_driven_check_net_delays(net_list, net_delay)); - - if (itry == 1) { - generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat); - } - } - - float iter_cumm_time = iteration_timer.elapsed_sec(); - float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time; - - //Output progress - print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, router_iteration_stats, overuse_info, wirelength_info, timing_info, est_success_iteration); - - prev_iter_cumm_time = iter_cumm_time; - - //Update graphics - if (itry == 1) { - update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info); - } else { - update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info); - } - - if (router_opts.save_routing_per_iteration) { - std::string filename = vtr::string_fmt("iteration_%03d.route", itry); - print_route(net_list, nullptr, filename.c_str(), is_flat); - } - - //Update router stats (total) - update_router_stats(router_stats, router_iteration_stats); - - /* - * Are we finished? - */ - if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) { - auto& router_ctx = g_vpr_ctx.routing(); - - if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) { - //Save routing - best_routing = router_ctx.route_trees; - best_clb_opins_used_locally = router_ctx.clb_opins_used_locally; - - routing_is_successful = true; - - //Update best metrics - if (timing_info) { - timing_driven_check_net_delays(net_list, net_delay); - - best_routing_metrics.sTNS = timing_info->setup_total_negative_slack(); - best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack(); - best_routing_metrics.hTNS = timing_info->hold_total_negative_slack(); - best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack(); - best_routing_metrics.critical_path = critical_path; - } - best_routing_metrics.used_wirelength = wirelength_info.used_wirelength(); - } - - //Decrease pres_fac so that critical connections will take more direct routes - //Note that we use first_iter_pres_fac here (typically zero), and switch to - //use initial_pres_fac on the next iteration. - pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); - - //Reduce timing tolerances to re-route more delay-suboptimal signals - connections_inf.set_connection_criticality_tolerance(0.7); - connections_inf.set_connection_delay_tolerance(1.01); - - ++legal_convergence_count; - itry_since_last_convergence = 0; - - VTR_ASSERT(routing_is_successful); - } - - if (itry_since_last_convergence == 1) { - //We used first_iter_pres_fac when we started routing again - //after the first routing convergence. Since that is often zero, - //we want to set pres_fac to a reasonable (i.e. typically non-zero) - //value afterwards -- so it grows when multiplied by pres_fac_mult - pres_fac = update_pres_fac(router_opts.initial_pres_fac); - } - - //Have we converged the maximum number of times, did not make any changes, or does it seem - //unlikely additional convergences will improve QoR? - if (legal_convergence_count >= router_opts.max_convergence_count - || router_iteration_stats.connections_routed == 0 - || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) { -#ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -#endif - break; //Done routing - } - - /* - * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing? - */ - if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) { -#ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -#endif - //Abort - break; - } - - //Estimate at what iteration we will converge to a legal routing - if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) { - //Only consider aborting if we have a significant number of overused resources - - if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) { - VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration); -#ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -#endif - break; //Abort - } - } - - if (itry == 1 && router_opts.exit_after_first_routing_iteration) { - VTR_LOG("Exiting after first routing iteration as requested\n"); -#ifndef NO_GRAPHICS - update_router_info_and_check_bp(BP_ROUTE_ITER, -1); -#endif - break; - } - - /* - * Prepare for the next iteration - */ - - if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { - num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(rerouted_nets, net_list, router_opts.high_fanout_threshold); - } - - if (itry >= high_effort_congestion_mode_iteration_threshold) { - //We are approaching the maximum number of routing iterations, - //and still do not have a legal routing. Switch to a mode which - //focuses more on attempting to resolve routing conflicts. - router_congestion_mode = RouterCongestionMode::CONFLICTED; - } - - //Update pres_fac - if (itry == 1) { - pres_fac = update_pres_fac(router_opts.initial_pres_fac); - } else { - pres_fac *= router_opts.pres_fac_mult; - - /* Avoid overflow for high iteration counts, even if acc_cost is big */ - pres_fac = update_pres_fac(std::min(pres_fac, static_cast(HUGE_POSITIVE_FLOAT / 1e5))); - - // Increase short path criticality if it's having a hard time resolving hold violations due to congestion - if (budgeting_inf.if_set()) { - bool rcv_finished = false; - - /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets - * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router - * Increasing this will make it resolve hold faster, but could result in lower circuit quality */ - constexpr float budget_increase_factor = 300e-12; - - if (itry > 5 && worst_negative_slack != 0) rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup); - if (rcv_finished) - rcv_finished_count--; - else - rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; - } - } - - if (router_congestion_mode == RouterCongestionMode::CONFLICTED) { - //The design appears to have routing conflicts which are difficult to resolve: - // 1) Don't re-route legal connections due to delay. This allows - // the router to focus on the actual conflicts - // 2) Increase the net bounding boxes. This potentially allows - // the router to route around otherwise congested regions - // (at the cost of high run-time). - - //Increase the size of the net bounding boxes to give the router more - //freedom to find alternate paths. - // - //In the case of routing conflicts there are multiple connections competing - //for the same resources which can not resolve the congestion themselves. - //In normal routing mode we try to keep the bounding boxes small to minimize - //run-time, but this can limits how far signals can detour (i.e. they can't - //route outside the bounding box), which can cause conflicts to oscillate back - //and forth without resolving. - // - //By scaling the bounding boxes here, we slowly increase the router's search - //space in hopes of it allowing signals to move further out of the way to - //alleviate the conflicts. - if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) { - //We scale the bounding boxes by BB_SCALE_FACTOR, - //every BB_SCALE_ITER_COUNT iterations. This ensures - //that we give the router some time (BB_SCALE_ITER_COUNT) to try - //resolve/negotiate congestion at the new BB factor. - // - //Note that we increase the BB factor slowly to try and minimize - //the bounding box size (since larger bounding boxes slow the router down). - auto& grid = g_vpr_ctx.device().grid; - int max_grid_dim = std::max(grid.width(), grid.height()); - - //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow - bb_fac = std::min(max_grid_dim, bb_fac * BB_SCALE_FACTOR); - - route_ctx.route_bb = load_route_bb(net_list, bb_fac); - } - - ++itry_conflicted_mode; - } - - if (timing_info) { - if (should_setup_lower_bound_connection_delays(itry, router_opts)) { - // first iteration sets up the lower bound connection delays since only timing is optimized for - connections_inf.set_stable_critical_path_delay(critical_path.delay()); - connections_inf.set_lower_bound_connection_delays(net_delay); - - //load budgets using information from uncongested delay information - budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts); - /*for debugging purposes*/ - // if (budgeting_inf.if_set()) { - // budgeting_inf.print_route_budget(std::string("route_budgets_") + std::to_string(itry) + ".txt", net_delay); - // } - - if (router_opts.routing_budgets_algorithm == YOYO) - router.set_rcv_enabled(true); - - } else { - bool stable_routing_configuration = true; - - /* - * Determine if any connection need to be forcibly re-routed due to timing - */ - - //Yes, if explicitly enabled - bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON); - - //Or, if things are not too congested - should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO - && router_congestion_mode == RouterCongestionMode::NORMAL); - - if (should_ripup_for_delay) { - if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) { - // only need to forcibly reroute if critical path grew significantly - stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality, - timing_info, - netlist_pin_lookup, - net_delay); - } - } - - // not stable if any connection needs to be forcibly rerouted - if (stable_routing_configuration) { - connections_inf.set_stable_critical_path_delay(critical_path.delay()); - } - } - } else { - /* If timing analysis is not enabled, make sure that the criticalities and the - * net_delays stay as 0 so that wirelength can be optimized. */ - - for (auto net_id : net_list.nets()) { - for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { - net_delay[net_id][ipin] = 0.; - } - } - } - - if (router_opts.congestion_analysis) profiling::congestion_analysis(); - if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis(); - // profiling::time_on_criticality_analysis(); - } - - if (routing_is_successful) { - VTR_LOG("Restoring best routing\n"); - - auto& router_ctx = g_vpr_ctx.mutable_routing(); - - /* Restore congestion from best route */ - for (auto net_id : net_list.nets()) { - if (route_ctx.route_trees[net_id]) - pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1); - if (best_routing[net_id]) - pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1); - } - router_ctx.route_trees = best_routing; - router_ctx.clb_opins_used_locally = best_clb_opins_used_locally; - - prune_unused_non_configurable_nets(connections_inf, net_list); - - if (timing_info) { - VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay()); - } - - VTR_LOG("Successfully routed after %d routing iterations.\n", itry); - } else { - VTR_LOG("Routing failed.\n"); - - //If the routing fails, print the overused info - print_overused_nodes_status(router_opts, overuse_info); - - ++num_routing_failed; - -#ifdef VTR_ENABLE_DEBUG_LOGGING - if (f_router_debug) print_invalid_routing_info(net_list, is_flat); -#endif - } - - VTR_LOG("Final Net Connection Criticality Histogram:\n"); - print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat); - - VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes); - VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops); - VTR_LOG( - "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu " - "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", - router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops, - router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, - router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); - for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { - VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); - VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]); - VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]); - } - - VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt); - VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt); - VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout); - VTR_LOG("\n"); - - return routing_is_successful; -} - -template -NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - const ParentNetId& net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_negative_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - NetResultFlags flags; - - bool reroute_for_hold = false; - if (budgeting_inf.if_set()) { - reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); - reroute_for_hold &= worst_negative_slack != 0; - } - - if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ - flags.success = true; - } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ - flags.success = true; - } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { - flags.success = true; - } else { - // track time spent vs fanout - profiling::net_fanout_start(); - - flags = timing_driven_route_net(router, - net_list, - net_id, - itry, - pres_fac, - router_opts, - connections_inf, - router_stats, - pin_criticality, - net_delay[net_id].data(), - netlist_pin_lookup, - timing_info, - pin_timing_invalidator, - budgeting_inf, - worst_negative_slack, - routing_predictor, - choking_spots, - is_flat); - - profiling::net_fanout_end(net_list.net_sinks(net_id).size()); - - /* Impossible to route? (disconnected rr_graph) */ - if (flags.success) { - route_ctx.net_status.set_is_routed(net_id, true); - } else { - VTR_LOG("Routing failed for net %d\n", net_id); - } - - flags.was_rerouted = true; // Flag to record whether routing was actually changed - } - - return flags; -} - -int get_max_pins_per_net(const Netlist<>& net_list) { - int max_pins_per_net = 0; - for (auto net_id : net_list.nets()) { - if (!net_list.net_is_ignored(net_id)) - max_pins_per_net = std::max(max_pins_per_net, (int)net_list.net_pins(net_id).size()); - } - - return (max_pins_per_net); -} - -template -NetResultFlags timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - float* net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_neg_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - unsigned int num_sinks = net_list.net_sinks(net_id).size(); - - VTR_LOGV_DEBUG(f_router_debug, "Routing Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); - - NetResultFlags flags; - - setup_routing_resources( - itry, - net_id, - net_list, - num_sinks, - router_opts.min_incremental_reroute_fanout, - connections_inf, - router_opts, - check_hold(router_opts, worst_neg_slack)); - - VTR_ASSERT(route_ctx.route_trees[net_id]); - RouteTree& tree = route_ctx.route_trees[net_id].value(); - - bool high_fanout = is_high_fanout(num_sinks, router_opts.high_fanout_threshold); - - SpatialRouteTreeLookup spatial_route_tree_lookup; - if (high_fanout) { - spatial_route_tree_lookup = build_route_tree_spatial_lookup(net_list, - route_ctx.route_bb, - net_id, - tree.root()); - } - - // after this point the route tree is correct - // remaining_targets from this point on are the **pin indices** that have yet to be routed - std::vector remaining_targets(tree.get_remaining_isinks().begin(), tree.get_remaining_isinks().end()); - - // calculate criticality of remaining target pins - for (int ipin : remaining_targets) { - if (timing_info) { - auto pin = net_list.net_pin(net_id, ipin); - pin_criticality[ipin] = get_net_pin_criticality(timing_info, - netlist_pin_lookup, - router_opts.max_criticality, - router_opts.criticality_exp, - net_id, - pin, - is_flat); - - } else { - //No timing info, implies we want a min delay routing, so use criticality of 1. - pin_criticality[ipin] = 1.; - } - } - - // compare the criticality of different sink nodes - sort(begin(remaining_targets), end(remaining_targets), [&](int a, int b) { - return pin_criticality[a] > pin_criticality[b]; - }); - - /* Update base costs according to fanout and criticality rules */ - update_rr_base_costs(num_sinks); - - t_conn_delay_budget conn_delay_budget; - t_conn_cost_params cost_params; - cost_params.astar_fac = router_opts.astar_fac; - cost_params.bend_cost = router_opts.bend_cost; - cost_params.pres_fac = pres_fac; - cost_params.delay_budget = ((budgeting_inf.if_set()) ? &conn_delay_budget : nullptr); - - // Pre-route to clock source for clock nets (marked as global nets) - if (net_list.net_is_global(net_id) && router_opts.two_stage_clock_routing) { - //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK); - RRNodeId sink_node(device_ctx.virtual_clock_network_root_idx); - - enable_router_debug(router_opts, net_id, sink_node, itry, &router); - - VTR_LOGV_DEBUG(f_router_debug, "Pre-routing global net %zu\n", size_t(net_id)); - - // Set to the max timing criticality which should intern minimize clock insertion - // delay by selecting a direct route from the clock source to the virtual sink - cost_params.criticality = router_opts.max_criticality; - - /* Is the connection router allowed to grow the bounding box? That's not the case - * when routing in parallel, so disallow it. TODO: Have both timing_driven and parallel - * routers handle this in the same way */ - bool can_grow_bb = (router_opts.router_algorithm != PARALLEL); - - std::tie(flags.success, flags.retry_with_full_bb) = timing_driven_pre_route_to_clock_root(router, - net_id, - net_list, - sink_node, - cost_params, - router_opts.high_fanout_threshold, - tree, - spatial_route_tree_lookup, - router_stats, - is_flat, - can_grow_bb); - - return flags; - } - - if (budgeting_inf.if_set()) { - budgeting_inf.set_should_reroute(net_id, false); - } - - // explore in order of decreasing criticality (no longer need sink_order array) - for (unsigned itarget = 0; itarget < remaining_targets.size(); ++itarget) { - int target_pin = remaining_targets[itarget]; - - RRNodeId sink_rr = route_ctx.net_rr_terminals[net_id][target_pin]; - - enable_router_debug(router_opts, net_id, sink_rr, itry, &router); - - cost_params.criticality = pin_criticality[target_pin]; - - if (budgeting_inf.if_set()) { - conn_delay_budget.max_delay = budgeting_inf.get_max_delay_budget(net_id, target_pin); - conn_delay_budget.target_delay = budgeting_inf.get_delay_target(net_id, target_pin); - conn_delay_budget.min_delay = budgeting_inf.get_min_delay_budget(net_id, target_pin); - conn_delay_budget.short_path_criticality = budgeting_inf.get_crit_short_path(net_id, target_pin); - conn_delay_budget.routing_budgets_algorithm = router_opts.routing_budgets_algorithm; - } - - profiling::conn_start(); - - // build a branch in the route tree to the target - auto sink_flags = timing_driven_route_sink(router, - net_list, - net_id, - itarget, - target_pin, - cost_params, - router_opts, - tree, - spatial_route_tree_lookup, - router_stats, - budgeting_inf, - routing_predictor, - choking_spots, - is_flat); - - flags.retry_with_full_bb |= sink_flags.retry_with_full_bb; - - if (!sink_flags.success) { - flags.success = false; - return flags; - } - - profiling::conn_finish(size_t(route_ctx.net_rr_terminals[net_id][0]), - size_t(sink_rr), - pin_criticality[target_pin]); - - ++router_stats.connections_routed; - } // finished all sinks - - ++router_stats.nets_routed; - profiling::net_finish(); - - /* For later timing analysis. */ - - // may have to update timing delay of the previously legally reached sinks since downstream capacitance could be changed - update_net_delays_from_route_tree(net_delay, - net_list, - net_id, - timing_info.get(), - pin_timing_invalidator); - - if (router_opts.update_lower_bound_delays) { - for (int ipin : remaining_targets) { - connections_inf.update_lower_bound_connection_delay(net_id, ipin, net_delay[ipin]); - } - } - - VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); - - VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); - router.empty_rcv_route_tree_set(); // ? - - flags.success = true; - return flags; -} - -template -static std::tuple timing_driven_pre_route_to_clock_root(ConnectionRouter& router, - ParentNetId net_id, - const Netlist<>& net_list, - RRNodeId sink_node, - const t_conn_cost_params cost_params, - int high_fanout_threshold, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - bool is_flat, - bool can_grow_bb) { - const auto& device_ctx = g_vpr_ctx.device(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - auto& m_route_ctx = g_vpr_ctx.mutable_routing(); - - bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), high_fanout_threshold); - - VTR_LOGV_DEBUG(f_router_debug, "Net %zu pre-route to (%s)\n", size_t(net_id), describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str()); - profiling::sink_criticality_start(); - - t_bb bounding_box = route_ctx.route_bb[net_id]; - - router.clear_modified_rr_node_info(); - - bool found_path, retry_with_full_bb; - t_heap cheapest; - ConnectionParameters conn_params(net_id, - -1, - false, - std::unordered_map()); - - std::tie(found_path, retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree( - tree.root(), - sink_node, - cost_params, - bounding_box, - router_stats, - conn_params, - can_grow_bb); - - // TODO: Parts of the rest of this function are repetitive to code in timing_driven_route_sink. Should refactor. - if (!found_path) { - ParentBlockId src_block = net_list.net_driver_block(net_id); - VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n", - net_list.block_name(src_block).c_str(), - describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str(), - net_list.net_name(net_id).c_str(), - size_t(net_id)); - if (f_router_debug) { - update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); - } - return std::make_tuple(found_path, retry_with_full_bb); - } - - profiling::sink_criticality_end(cost_params.criticality); - - /* This is a special pre-route to a sink that does not correspond to any * - * netlist pin, but which can be reached from the global clock root drive * - * points. Therefore, we can set the net pin index of the sink node to * - * OPEN (meaning illegal) as it is not meaningful for this sink. */ - vtr::optional new_branch, new_sink; - std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, OPEN, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat); - - VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup)); - - if (f_router_debug) { - std::string msg = vtr::string_fmt("Routed Net %zu connection to RR node %d successfully", size_t(net_id), sink_node); - update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr); - } - - if (new_branch) - pathfinder_update_cost_from_route_tree(new_branch.value(), 1); - - // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink - // do this by resetting all the path_costs that have been touched while routing to the current sink - router.reset_path_costs(); - - // Post route cleanup: - // - remove sink from route tree and fix routing for all nodes leading to the sink ("freeze") - // - free up virtual sink occupancy - tree.freeze(); - m_route_ctx.rr_node_route_inf[sink_node].set_occ(0); - - // routed to a sink successfully - return std::make_tuple(true, false); -} - -template -static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - unsigned itarget, - int target_pin, - const t_conn_cost_params cost_params, - const t_router_opts& router_opts, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - route_budgets& budgeting_inf, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - const auto& device_ctx = g_vpr_ctx.device(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - NetResultFlags flags; - - profiling::sink_criticality_start(); - - RRNodeId sink_node = route_ctx.net_rr_terminals[net_id][target_pin]; - VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str()); - - router.clear_modified_rr_node_info(); - - bool found_path; - t_heap cheapest; - t_bb bounding_box = route_ctx.route_bb[net_id]; - - /* Is the connection router allowed to grow the bounding box? That's not the case - * when routing in parallel, so disallow it. */ - bool can_grow_bb = (router_opts.router_algorithm != PARALLEL); - - bool net_is_global = net_list.net_is_global(net_id); - bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), router_opts.high_fanout_threshold); - constexpr float HIGH_FANOUT_CRITICALITY_THRESHOLD = 0.9; - bool sink_critical = (cost_params.criticality > HIGH_FANOUT_CRITICALITY_THRESHOLD); - bool net_is_clock = route_ctx.is_clock_net[net_id] != 0; - - bool has_choking_spot = ((int)choking_spots[target_pin].size() != 0) && router_opts.has_choking_spot; - ConnectionParameters conn_params(net_id, target_pin, has_choking_spot, choking_spots[target_pin]); - - //We normally route high fanout nets by only adding spatially close-by routing to the heap (reduces run-time). - //However, if the current sink is 'critical' from a timing perspective, we put the entire route tree back onto - //the heap to ensure it has more flexibility to find the best path. - if (high_fanout && !sink_critical && !net_is_global && !net_is_clock && -routing_predictor.get_slope() > router_opts.high_fanout_max_slope) { - std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree_high_fanout(tree.root(), - sink_node, - cost_params, - bounding_box, - spatial_rt_lookup, - router_stats, - conn_params, - can_grow_bb); - } else { - std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), - sink_node, - cost_params, - bounding_box, - router_stats, - conn_params, - can_grow_bb); - } - - if (!found_path) { - ParentBlockId src_block = net_list.net_driver_block(net_id); - ParentBlockId sink_block = net_list.pin_block(*(net_list.net_pins(net_id).begin() + target_pin)); - VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n", - net_list.block_name(src_block).c_str(), - net_list.block_name(sink_block).c_str(), - net_list.net_name(net_id).c_str(), - size_t(net_id)); - if (f_router_debug) { - update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); - } - flags.success = false; - return flags; - } - - profiling::sink_criticality_end(cost_params.criticality); - - RRNodeId inode(cheapest.index); - route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ - - vtr::optional new_branch, new_sink; - std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, target_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat); - - VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup)); - - if (f_router_debug) { - std::string msg = vtr::string_fmt("Routed Net %zu connection %d to RR node %d successfully", size_t(net_id), itarget, sink_node); - update_screen(ScreenUpdatePriority::MAJOR, msg.c_str(), ROUTING, nullptr); - } - - if (budgeting_inf.if_set() && cheapest.path_data != nullptr && cost_params.delay_budget) { - if (cheapest.path_data->backward_delay < cost_params.delay_budget->min_delay) { - budgeting_inf.set_should_reroute(net_id, true); - } - } - - /* update global occupancy from the new branch */ - if (new_branch) - pathfinder_update_cost_from_route_tree(new_branch.value(), 1); - - // need to guarantee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink - // do this by resetting all the path_costs that have been touched while routing to the current sink - router.reset_path_costs(); - - // routed to a sink successfully - flags.success = true; - return flags; -} - -static void setup_routing_resources(int itry, - ParentNetId net_id, - const Netlist<>& net_list, - unsigned num_sinks, - int min_incremental_reroute_fanout, - CBRR& connections_inf, - const t_router_opts& router_opts, - bool ripup_high_fanout_nets) { - /* Build and return a partial route tree from the legal connections from last iteration. - * along the way do: - * update pathfinder costs to be accurate to the partial route tree - * mark the rr_node sinks as targets to be reached. */ - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - /* "tree" points to this net's spot in the global context here, so re-initializing it etc. changes the global state */ - vtr::optional& tree = route_ctx.route_trees[net_id]; - - // for nets below a certain size (min_incremental_reroute_fanout), rip up any old routing - // otherwise, we incrementally reroute by reusing legal parts of the previous iteration - if ((int)num_sinks < min_incremental_reroute_fanout || itry == 1 || ripup_high_fanout_nets) { - profiling::net_rerouted(); - - /* rip up the whole net */ - if (tree) - pathfinder_update_cost_from_route_tree(tree.value().root(), -1); - tree = vtr::nullopt; - - /* re-initialize net */ - tree = RouteTree(net_id); - pathfinder_update_cost_from_route_tree(tree.value().root(), 1); - - // since all connections will be rerouted for this net, clear all of net's forced reroute flags - connections_inf.clear_force_reroute_for_net(net_id); - - // when we don't prune the tree, we also don't know the sink node indices - // thus we'll use functions that act on pin indices like mark_ends instead - // of their versions that act on node indices directly like mark_remaining_ends - mark_ends(net_list, net_id); - } else { - profiling::net_rebuild_start(); - - if (!tree) { - tree = RouteTree(net_id); - pathfinder_update_cost_from_route_tree(tree.value().root(), 1); - } - - /* copy the existing routing - * prune() depends on global occ, so we can't subtract before pruning - * OPT: to skip this copy, return a "diff" from RouteTree::prune */ - RouteTree tree2 = tree.value(); - - // Skip this check if RCV is enabled, as RCV can use another method to cause reroutes - VTR_ASSERT_SAFE(should_route_net(net_id, connections_inf, true) || router_opts.routing_budgets_algorithm == YOYO); - - // Prune the copy (using congestion data before subtraction) - vtr::optional pruned_tree2 = tree2.prune(connections_inf); - - // Subtract congestion using the non-pruned original - pathfinder_update_cost_from_route_tree(tree.value().root(), -1); - - if (pruned_tree2) { //Partially pruned - profiling::route_tree_preserved(); - - // Add back congestion for the pruned route tree - pathfinder_update_cost_from_route_tree(pruned_tree2.value().root(), 1); - // pruned_tree2 is no longer required -> we can move rather than copy - tree = std::move(pruned_tree2.value()); - } else { // Fully destroyed - profiling::route_tree_pruned(); - - // Initialize only to source - tree = RouteTree(net_id); - pathfinder_update_cost_from_route_tree(tree.value().root(), 1); - } - - profiling::net_rebuild_end(num_sinks, tree->get_remaining_isinks().size()); - - // still need to calculate the tree's time delay - tree.value().reload_timing(); - - // check for R_upstream C_downstream and edge correctness - VTR_ASSERT_SAFE(tree.value().is_valid()); - - // congestion should've been pruned away - VTR_ASSERT_SAFE(tree.value().is_uncongested()); - - // mark remaining ends - mark_remaining_ends(net_id); - - // mark the lookup (rr_node_route_inf) for existing tree elements as NO_PREVIOUS so add_to_path stops when it reaches one of them - update_rr_route_inf_from_tree(tree.value().root()); - } - - // completed constructing the partial route tree and updated all other data structures to match -} - -/** Change the base costs of rr_nodes according to # of fanouts */ -void update_rr_base_costs(int fanout) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - - float factor; - size_t index; - - /* Other reasonable values for factor include fanout and 1 */ - factor = sqrt(fanout); - - for (index = CHANX_COST_INDEX_START; index < device_ctx.rr_indexed_data.size(); index++) { - if (device_ctx.rr_indexed_data[RRIndexedDataId(index)].T_quadratic > 0.) { /* pass transistor */ - device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost * factor; - } else { - device_ctx.rr_indexed_data[RRIndexedDataId(index)].base_cost = device_ctx.rr_indexed_data[RRIndexedDataId(index)].saved_base_cost; - } - } -} - -void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) { - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - for (auto& child : rt_node.child_nodes()) { - RRNodeId inode = child.inode; - route_ctx.rr_node_route_inf[inode].prev_node = RRNodeId::INVALID(); - route_ctx.rr_node_route_inf[inode].prev_edge = RREdgeId::INVALID(); - - // path cost should be unset - VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)); - VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].backward_path_cost)); - - update_rr_route_inf_from_tree(child); - } -} - -bool timing_driven_check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay) { - constexpr float ERROR_TOL = 0.0001; - - /* Checks that the net delays computed incrementally during timing driven * - * routing match those computed from scratch by the net_delay.c module. */ - - unsigned int ipin; - auto net_delay_check = make_net_pins_matrix(net_list); - - load_net_delay_from_routing(net_list, net_delay_check); - - for (auto net_id : net_list.nets()) { - for (ipin = 1; ipin < net_list.net_pins(net_id).size(); ipin++) { - if (net_delay_check[net_id][ipin] == 0.) { /* Should be only GLOBAL nets */ - if (fabs(net_delay[net_id][ipin]) > ERROR_TOL) { - VPR_ERROR(VPR_ERROR_ROUTE, - "in timing_driven_check_net_delays: net %lu pin %d.\n" - "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n", - size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]); - } - } else { - float error = fabs(1.0 - net_delay[net_id][ipin] / net_delay_check[net_id][ipin]); - if (error > ERROR_TOL) { - VPR_ERROR(VPR_ERROR_ROUTE, - "in timing_driven_check_net_delays: net %d pin %lu.\n" - "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n", - size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]); - } - } - } - } - - return true; -} - -/* Goes through all the sinks of this net and copies their delay values from - * the route_tree to the net_delay array. */ -static void update_net_delays_from_route_tree(float* net_delay, - const Netlist<>& net_list, - ParentNetId inet, - TimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator) { - auto& route_ctx = g_vpr_ctx.routing(); - const RouteTree& tree = route_ctx.route_trees[inet].value(); - - for (unsigned int isink = 1; isink < net_list.net_pins(inet).size(); isink++) { - update_net_delay_from_isink(net_delay, tree, isink, net_list, inet, timing_info, pin_timing_invalidator); - } -} - -/* Detect if net should be routed or not */ -bool should_route_net(ParentNetId net_id, - CBRR& connections_inf, - bool if_force_reroute) { - auto& route_ctx = g_vpr_ctx.routing(); - auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - - if (!route_ctx.route_trees[net_id]) { - /* No routing yet. */ - return true; - } - - const RouteTree& tree = route_ctx.route_trees[net_id].value(); - - /* Walk over all rt_nodes in the net */ - for (auto& rt_node : tree.all_nodes()) { - RRNodeId inode = rt_node.inode; - int occ = route_ctx.rr_node_route_inf[inode].occ(); - int capacity = rr_graph.node_capacity(inode); - - if (occ > capacity) { - return true; /* overuse detected */ - } - - if (rt_node.is_leaf()) { //End of a branch - // even if net is fully routed, not complete if parts of it should get ripped up (EXPERIMENTAL) - if (if_force_reroute) { - if (connections_inf.should_force_reroute_connection(net_id, inode)) { - return true; - } - } - } - } - - /* If all sinks have been routed to without overuse, no need to route this */ - if (tree.get_remaining_isinks().empty()) - return false; - - return true; -} - -bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) { - if (wirelength_info.used_wirelength_ratio() > router_opts.init_wirelength_abort_threshold) { - VTR_LOG("Wire length usage ratio %g exceeds limit of %g, fail routing.\n", - wirelength_info.used_wirelength_ratio(), - router_opts.init_wirelength_abort_threshold); - return true; - } - return false; -} - -static bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) { - /* When RCV is enabled, it's necessary to be able to completely ripup high fanout nets if there is still negative hold slack - * Normally the router will prune the illegal branches of high fanout nets, this will bypass this */ - - if (router_opts.routing_budgets_algorithm != YOYO) { - return false; - } else if (worst_neg_slack != 0) { - return true; - } - return false; -} - -static float get_net_pin_criticality(const std::shared_ptr timing_info, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - float max_criticality, - float criticality_exp, - ParentNetId net_id, - ParentPinId pin_id, - bool is_flat) { - float pin_criticality = 0.0; - const auto& route_ctx = g_vpr_ctx.routing(); - - if (route_ctx.is_clock_net[net_id]) { - pin_criticality = max_criticality; - } else { - pin_criticality = calculate_clb_net_pin_criticality(*timing_info, - netlist_pin_lookup, - pin_id, - is_flat); - } - - /* Pin criticality is between 0 and 1. - * Shift it downwards by 1 - max_criticality (max_criticality is 0.99 by default, - * so shift down by 0.01) and cut off at 0. This means that all pins with small - * criticalities (<0.01) get criticality 0 and are ignored entirely, and everything - * else becomes a bit less critical. This effect becomes more pronounced if - * max_criticality is set lower. */ - // VTR_ASSERT(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01); - pin_criticality = std::max(pin_criticality - (1.0 - max_criticality), 0.0); - - /* Take pin criticality to some power (1 by default). */ - pin_criticality = std::pow(pin_criticality, criticality_exp); - - /* Cut off pin criticality at max_criticality. */ - pin_criticality = std::min(pin_criticality, max_criticality); - - return pin_criticality; -} - -size_t calculate_wirelength_available() { - auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - - size_t available_wirelength = 0; - // But really what's happening is that this for loop iterates over every node and determines the available wirelength - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - const t_rr_type channel_type = rr_graph.node_type(rr_id); - if (channel_type == CHANX || channel_type == CHANY) { - available_wirelength += rr_graph.node_capacity(rr_id) * rr_graph.node_length(rr_id); - } - } - return available_wirelength; -} - -WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength) { - size_t used_wirelength = 0; - VTR_ASSERT(available_wirelength > 0); - - auto& route_ctx = g_vpr_ctx.routing(); - - for (auto net_id : net_list.nets()) { - if (!net_list.net_is_ignored(net_id) - && net_list.net_sinks(net_id).size() != 0 /* Globals don't count. */ - && route_ctx.route_trees[net_id]) { - int bends, wirelength, segments; - bool is_absorbed; - get_num_bends_and_length(net_id, &bends, &wirelength, &segments, &is_absorbed); - - used_wirelength += wirelength; - } - } - - return WirelengthInfo(available_wirelength, used_wirelength); -} - -void print_route_status_header() { - VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n"); - VTR_LOG("Iter Time pres BBs Heap Re-Rtd Re-Rtd Overused RR Nodes Wirelength CPD sTNS sWNS hTNS hWNS Est Succ\n"); - VTR_LOG(" (sec) fac Updt push Nets Conns (ns) (ns) (ns) (ns) (ns) Iter\n"); - VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n"); -} - -void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr timing_info, float est_success_iteration) { - //Iteration - VTR_LOG("%4d", itry); - - //Elapsed Time - VTR_LOG(" %6.1f", elapsed_sec); - - //pres_fac - constexpr int PRES_FAC_DIGITS = 7; - constexpr int PRES_FAC_SCI_PRECISION = 1; - pretty_print_float(" ", pres_fac, PRES_FAC_DIGITS, PRES_FAC_SCI_PRECISION); - //VTR_LOG(" %5.1f", pres_fac); - - //Number of bounding boxes updated - VTR_LOG(" %4d", num_bb_updated); - - //Heap push/pop - constexpr int HEAP_OP_DIGITS = 7; - constexpr int HEAP_OP_SCI_PRECISION = 2; - pretty_print_uint(" ", router_stats.heap_pushes, HEAP_OP_DIGITS, HEAP_OP_SCI_PRECISION); - VTR_ASSERT(router_stats.heap_pops <= router_stats.heap_pushes); - - //Rerouted nets - constexpr int NET_ROUTED_DIGITS = 7; - constexpr int NET_ROUTED_SCI_PRECISION = 2; - pretty_print_uint(" ", router_stats.nets_routed, NET_ROUTED_DIGITS, NET_ROUTED_SCI_PRECISION); - - //Rerouted connections - constexpr int CONN_ROUTED_DIGITS = 7; - constexpr int CONN_ROUTED_SCI_PRECISION = 2; - pretty_print_uint(" ", router_stats.connections_routed, CONN_ROUTED_DIGITS, CONN_ROUTED_SCI_PRECISION); - - //Overused RR nodes - constexpr int OVERUSE_DIGITS = 7; - constexpr int OVERUSE_SCI_PRECISION = 2; - pretty_print_uint(" ", overuse_info.overused_nodes, OVERUSE_DIGITS, OVERUSE_SCI_PRECISION); - VTR_LOG(" (%6.3f%%)", overuse_info.overused_node_ratio() * 100); - - //Wirelength - constexpr int WL_DIGITS = 7; - constexpr int WL_SCI_PRECISION = 2; - pretty_print_uint(" ", wirelength_info.used_wirelength(), WL_DIGITS, WL_SCI_PRECISION); - VTR_LOG(" (%4.1f%%)", wirelength_info.used_wirelength_ratio() * 100); - - //CPD - if (timing_info) { - float cpd = timing_info->least_slack_critical_path().delay(); - VTR_LOG(" %#8.3f", 1e9 * cpd); - } else { - VTR_LOG(" %8s", "N/A"); - } - - //sTNS - if (timing_info) { - float sTNS = timing_info->setup_total_negative_slack(); - VTR_LOG(" % #10.4g", 1e9 * sTNS); - } else { - VTR_LOG(" %10s", "N/A"); - } - - //sWNS - if (timing_info) { - float sWNS = timing_info->setup_worst_negative_slack(); - VTR_LOG(" % #10.3f", 1e9 * sWNS); - } else { - VTR_LOG(" %10s", "N/A"); - } - - //hTNS - if (timing_info) { - float hTNS = timing_info->hold_total_negative_slack(); - VTR_LOG(" % #10.4g", 1e9 * hTNS); - } else { - VTR_LOG(" %10s", "N/A"); - } - - //hWNS - if (timing_info) { - float hWNS = timing_info->hold_worst_negative_slack(); - VTR_LOG(" % #10.3f", 1e9 * hWNS); - } else { - VTR_LOG(" %10s", "N/A"); - } - - //Estimated success iteration - if (std::isnan(est_success_iteration)) { - VTR_LOG(" %8s", "N/A"); - } else { - VTR_LOG(" %8.0f", est_success_iteration); - } - - VTR_LOG("\n"); - - fflush(stdout); -} - -void print_router_criticality_histogram(const Netlist<>& net_list, - const SetupTimingInfo& timing_info, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - bool is_flat) { - print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10)); -} - -void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info) { - //Print the index of this routing failure - VTR_LOG("\nFailed routing attempt #%d\n", num_routing_failed); - - size_t num_overused = overuse_info.overused_nodes; - size_t max_logged_overused_rr_nodes = router_opts.max_logged_overused_rr_nodes; - - //Overused nodes info logging upper limit - VTR_LOG("Total number of overused nodes: %d\n", num_overused); - if (num_overused > max_logged_overused_rr_nodes) { - VTR_LOG("Total number of overused nodes is larger than the logging limit (%d).\n", max_logged_overused_rr_nodes); - VTR_LOG("Displaying the first %d entries.\n", max_logged_overused_rr_nodes); - } - - log_overused_nodes_status(max_logged_overused_rr_nodes); - VTR_LOG("\n"); -} - -//Returns true if the specified net fanout is classified as high fanout -static bool is_high_fanout(int fanout, int fanout_threshold) { - if (fanout_threshold < 0 || fanout < fanout_threshold) return false; - return true; -} - -// In heavily congested designs a static bounding box (BB) can -// become problematic for routability (it effectively enforces a -// hard blockage restricting where a net can route). -// -// For instance, the router will try to route non-critical connections -// away from congested regions, but may end up hitting the edge of the -// bounding box. Limiting how far out-of-the-way it can be routed, and -// preventing congestion from resolving. -// -// To alleviate this, we dynamically expand net bounding boxes if the net's -// *current* routing uses RR nodes 'close' to the edge of it's bounding box. -// -// The result is that connections trying to move out of the way and hitting -// their BB will have their bounding boxes will expand slowly in that direction. -// This helps spread out regions of heavy congestion (over several routing -// iterations). -// -// By growing the BBs slowly and only as needed we minimize the size of the BBs. -// This helps keep the router's graph search fast. -// -// Typically, only a small minority of nets (typically > 10%) have their BBs updated -// each routing iteration. -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets, - const Netlist<>& net_list, - int high_fanout_threshold) { - auto& device_ctx = g_vpr_ctx.device(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - auto& grid = device_ctx.grid; - - //Controls how close a net's routing needs to be to it's bounding box - //before the bounding box is expanded. - // - //A value of zero indicates that the routing needs to be at the bounding box - //edge - constexpr int DYNAMIC_BB_DELTA_THRESHOLD = 0; - - //Walk through each net, calculating the bounding box of its current routing, - //and then increase the router's bounding box if the two are close together - - int grid_xmax = grid.width() - 1; - int grid_ymax = grid.height() - 1; - - size_t num_bb_updated = 0; - - for (ParentNetId net : updated_nets) { - if (!route_ctx.route_trees[net]) - continue; // Skip if no routing - if (!route_ctx.net_status.is_routed(net)) - continue; - - //We do not adjust the bounding boxes of high fanout nets, since they - //use different bounding boxes based on the target location. - // - //This ensures that the delta values calculated below are always non-negative - if (is_high_fanout(net_list.net_sinks(net).size(), high_fanout_threshold)) continue; - - t_bb curr_bb = calc_current_bb(route_ctx.route_trees[net].value()); - t_bb& router_bb = route_ctx.route_bb[net]; - - //Calculate the distances between the net's used RR nodes and - //the router's bounding box - int delta_xmin = curr_bb.xmin - router_bb.xmin; - int delta_xmax = router_bb.xmax - curr_bb.xmax; - int delta_ymin = curr_bb.ymin - router_bb.ymin; - int delta_ymax = router_bb.ymax - curr_bb.ymax; - - //Note that if the net uses non-configurable switches it's routing - //may end-up outside the bounding boxes, so the delta values may be - //negative. The code below will expand the bounding box in those - //cases. - - //Expand each dimension by one if within DYNAMIC_BB_DELTA_THRESHOLD threshold - bool updated_bb = false; - if (delta_xmin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmin > 0) { - --router_bb.xmin; - updated_bb = true; - } - - if (delta_ymin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymin > 0) { - --router_bb.ymin; - updated_bb = true; - } - - if (delta_xmax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmax < grid_xmax) { - ++router_bb.xmax; - updated_bb = true; - } - - if (delta_ymax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymax < grid_ymax) { - ++router_bb.ymax; - updated_bb = true; - } - - if (updated_bb) { - ++num_bb_updated; - //VTR_LOG("Expanded net %6zu router BB to (%d,%d)x(%d,%d) based on net RR node BB (%d,%d)x(%d,%d)\n", size_t(net), - //router_bb.xmin, router_bb.ymin, router_bb.xmax, router_bb.ymax, - //curr_bb.xmin, curr_bb.ymin, curr_bb.xmax, curr_bb.ymax); - } - } - return num_bb_updated; -} - -//Returns the bounding box of a net's used routing resources -t_bb calc_current_bb(const RouteTree& tree) { - auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - auto& grid = device_ctx.grid; - - t_bb bb; - bb.xmin = grid.width() - 1; - bb.ymin = grid.height() - 1; - bb.layer_min = grid.get_num_layers() - 1; - bb.xmax = 0; - bb.ymax = 0; - bb.layer_max = 0; - - for (auto& rt_node : tree.all_nodes()) { - //The router interprets RR nodes which cross the boundary as being - //'within' of the BB. Only those which are *strictly* out side the - //box are excluded, hence we use the nodes xhigh/yhigh for xmin/xmax, - //and xlow/ylow for xmax/ymax calculations - bb.xmin = std::min(bb.xmin, rr_graph.node_xhigh(rt_node.inode)); - bb.ymin = std::min(bb.ymin, rr_graph.node_yhigh(rt_node.inode)); - bb.layer_min = std::min(bb.layer_min, rr_graph.node_layer(rt_node.inode)); - bb.xmax = std::max(bb.xmax, rr_graph.node_xlow(rt_node.inode)); - bb.ymax = std::max(bb.ymax, rr_graph.node_ylow(rt_node.inode)); - bb.layer_max = std::max(bb.layer_max, rr_graph.node_layer(rt_node.inode)); - } - - VTR_ASSERT(bb.xmin <= bb.xmax); - VTR_ASSERT(bb.ymin <= bb.ymax); - - return bb; -} - -void enable_router_debug( - const t_router_opts& router_opts, - ParentNetId net, - RRNodeId sink_rr, - int router_iteration, - ConnectionRouterInterface* router) { - bool active_net_debug = (router_opts.router_debug_net >= -1); - bool active_sink_debug = (router_opts.router_debug_sink_rr >= 0); - bool active_iteration_debug = (router_opts.router_debug_iteration >= 0); - - bool match_net = (ParentNetId(router_opts.router_debug_net) == net || router_opts.router_debug_net == -1); - bool match_sink = (router_opts.router_debug_sink_rr == int(size_t((sink_rr))) || router_opts.router_debug_sink_rr < 0); - bool match_iteration = (router_opts.router_debug_iteration == router_iteration || router_opts.router_debug_iteration < 0); - - f_router_debug = active_net_debug || active_sink_debug || active_iteration_debug; - - if (active_net_debug) f_router_debug &= match_net; - if (active_sink_debug) f_router_debug &= match_sink; - if (active_iteration_debug) f_router_debug &= match_iteration; - - router->set_router_debug(f_router_debug); - -#ifndef VTR_ENABLE_DEBUG_LOGGING - VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n"); -#endif -} - -bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished) { - //This function checks if a routing iteration has completed. - //When VPR is run normally, we check if routing_budgets_algorithm is disabled, and if the routing is legal - //With the introduction of yoyo budgeting algorithm, we must check if there are no hold violations - //in addition to routing being legal and the correct budgeting algorithm being set. - - if (routing_is_feasible) { - if (router_opts.routing_budgets_algorithm != YOYO) { - return true; - } else if (router_opts.routing_budgets_algorithm == YOYO && (timing_info->hold_worst_negative_slack() == 0 || rcv_finished) && itry != 1) { - return true; - } - } - return false; -} - -bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& /*router_opts*/) { - /* Checks to see if router should (re)calculate route budgets - * It's currently set to only calculate after the first routing iteration */ - - if (itry == 1) return true; - return false; -} - -bool is_better_quality_routing(const vtr::vector>& best_routing, - const RoutingMetrics& best_routing_metrics, - const WirelengthInfo& wirelength_info, - std::shared_ptr timing_info) { - if (best_routing.empty()) { - return true; // First legal routing - } - - // Rank first based on sWNS, followed by other timing metrics - if (timing_info) { - if (timing_info->setup_worst_negative_slack() > best_routing_metrics.sWNS) { - return true; - } else if (timing_info->setup_worst_negative_slack() < best_routing_metrics.sWNS) { - return false; - } - - if (timing_info->setup_total_negative_slack() > best_routing_metrics.sTNS) { - return true; - } else if (timing_info->setup_total_negative_slack() < best_routing_metrics.sTNS) { - return false; - } - - if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) { - return true; - } else if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) { - return false; - } - - if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) { - return true; - } else if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) { - return false; - } - } - - // Finally, wirelength tie breaker - return wirelength_info.used_wirelength() < best_routing_metrics.used_wirelength; -} - -bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, - int itry_since_last_convergence, - std::shared_ptr timing_info, - const RoutingMetrics& best_routing_metrics) { - // Give-up on reconvergent routing if the CPD improvement after the - // first iteration since convergence is small, compared to the best - // CPD seen so far - if (itry_since_last_convergence == 1) { - float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS; - - // Give up if we see less than a 1% CPD improvement, - // after reducing pres_fac. Typically larger initial - // improvements are needed to see an actual improvement - // in final legal routing quality. - if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) { - VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio); - return true; // Potential CPD improvement is small, don't spend run-time trying to improve it - } - } - - return false; // Don't give up -} - -void generate_route_timing_reports(const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const RoutingDelayCalculator& delay_calc, - bool is_flat) { - auto& timing_ctx = g_vpr_ctx.timing(); - auto& atom_ctx = g_vpr_ctx.atom(); - - VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat); - resolver.set_detail_level(analysis_opts.timing_report_detail); - - tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints); - - timing_reporter.report_timing_setup(router_opts.first_iteration_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); -} - -// If a route is ripped up during routing, non-configurable sets are left -// behind. As a result, the final routing may have stubs at -// non-configurable sets. This function tracks non-configurable set usage, -// and if the sets are unused, prunes them. -void prune_unused_non_configurable_nets(CBRR& connections_inf, - const Netlist<>& net_list) { - auto& device_ctx = g_vpr_ctx.device(); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - - std::vector non_config_node_set_usage(device_ctx.rr_non_config_node_sets.size(), 0); - for (auto net_id : net_list.nets()) { - if (!route_ctx.route_trees[net_id]) - continue; - RouteTree& tree = route_ctx.route_trees[net_id].value(); - - connections_inf.clear_force_reroute_for_net(net_id); - - std::vector usage = tree.get_non_config_node_set_usage(); - - // Prune the branches of the tree that don't legally lead to sinks - tree.prune(connections_inf, &usage); - } -} - -// Initializes net_delay based on best-case delay estimates from the router lookahead -void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, - const Netlist<>& net_list, - const vtr::vector>& net_rr_terminals, - NetPinsMatrix& net_delay, - const RRGraphView& rr_graph, - bool is_flat) { - t_conn_cost_params cost_params; - cost_params.criticality = 1.; // Ensures lookahead returns delay value - - for (auto net_id : net_list.nets()) { - if (net_list.net_is_ignored(net_id)) continue; - - RRNodeId source_rr = net_rr_terminals[net_id][0]; - - for (size_t ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { - RRNodeId sink_rr = net_rr_terminals[net_id][ipin]; - - float est_delay = get_cost_from_lookahead(router_lookahead, - rr_graph, - source_rr, - sink_rr, - 0., - cost_params, - is_flat); - VTR_ASSERT(std::isfinite(est_delay) && est_delay < std::numeric_limits::max()); - - net_delay[net_id][ipin] = est_delay; - } - } -} - -void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats) { - router_stats.connections_routed += router_iteration_stats.connections_routed; - router_stats.nets_routed += router_iteration_stats.nets_routed; - router_stats.heap_pushes += router_iteration_stats.heap_pushes; - router_stats.inter_cluster_node_pushes += router_iteration_stats.inter_cluster_node_pushes; - router_stats.intra_cluster_node_pushes += router_iteration_stats.intra_cluster_node_pushes; - router_stats.heap_pops += router_iteration_stats.heap_pops; - router_stats.inter_cluster_node_pops += router_iteration_stats.inter_cluster_node_pops; - router_stats.intra_cluster_node_pops += router_iteration_stats.intra_cluster_node_pops; - for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { - router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx] += router_iteration_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]; - router_stats.inter_cluster_node_type_cnt_pops[node_type_idx] += router_iteration_stats.inter_cluster_node_type_cnt_pops[node_type_idx]; - router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx] += router_iteration_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]; - router_stats.intra_cluster_node_type_cnt_pops[node_type_idx] += router_iteration_stats.intra_cluster_node_type_cnt_pops[node_type_idx]; - router_stats.rt_node_pushes[node_type_idx] += router_iteration_stats.rt_node_pushes[node_type_idx]; - router_stats.rt_node_high_fanout_pushes[node_type_idx] += router_iteration_stats.rt_node_high_fanout_pushes[node_type_idx]; - router_stats.rt_node_entire_tree_pushes[node_type_idx] += router_iteration_stats.rt_node_entire_tree_pushes[node_type_idx]; - } - router_stats.add_all_rt += router_iteration_stats.add_all_rt; - router_stats.add_all_rt_from_high_fanout += router_iteration_stats.add_all_rt_from_high_fanout; - router_stats.add_high_fanout_rt += router_iteration_stats.add_high_fanout_rt; -} - -void init_router_stats(RouterStats& router_stats) { - router_stats.connections_routed = 0; - router_stats.nets_routed = 0; - router_stats.heap_pushes = 0; - router_stats.heap_pops = 0; - router_stats.inter_cluster_node_pushes = 0; - router_stats.inter_cluster_node_pops = 0; - router_stats.intra_cluster_node_pushes = 0; - router_stats.intra_cluster_node_pops = 0; - for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { - router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx] = 0; - router_stats.inter_cluster_node_type_cnt_pops[node_type_idx] = 0; - router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx] = 0; - router_stats.intra_cluster_node_type_cnt_pops[node_type_idx] = 0; - router_stats.rt_node_pushes[node_type_idx] = 0; - router_stats.rt_node_entire_tree_pushes[node_type_idx] = 0; - router_stats.rt_node_high_fanout_pushes[node_type_idx] = 0; - } - router_stats.add_all_rt = 0; - router_stats.add_high_fanout_rt = 0; - router_stats.add_all_rt_from_high_fanout = 0; -} - -vtr::vector>> set_nets_choking_spots(const Netlist<>& net_list, - const vtr::vector>>& net_terminal_groups, - const vtr::vector>& net_terminal_group_num, - bool has_choking_spot, - bool is_flat) { - vtr::vector>> choking_spots(net_list.nets().size()); - for (const auto& net_id : net_list.nets()) { - choking_spots[net_id].resize(net_list.net_pins(net_id).size()); - } - - // Return if the architecture doesn't have any potential choke points - if (!has_choking_spot) { - return choking_spots; - } - - // We only identify choke points if flat_routing is enabled. - VTR_ASSERT(is_flat); - - const auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - const auto& route_ctx = g_vpr_ctx.routing(); - const auto& net_rr_terminal = route_ctx.net_rr_terminals; - - for (const auto& net_id : net_list.nets()) { - int pin_count = 0; - // Global nets are not routed, thus we don't consider them. - if (net_list.net_is_global(net_id)) { - continue; - } - for (auto pin_id : net_list.net_pins(net_id)) { - // pin_count == 0 corresponds to the net's source pin - if (pin_count == 0) { - pin_count++; - continue; - } - auto block_id = net_list.pin_block(pin_id); - auto blk_loc = get_block_loc(block_id, is_flat); - int group_num = net_terminal_group_num[net_id][pin_count]; - // This is a group of sinks, including the current pin_id, which share a specific number of parent blocks. - // To determine the choke points of the current sink, pin_id, we only consider the sinks in this group for the - // run-time purpose - std::vector sink_grp = net_terminal_groups[net_id][group_num]; - VTR_ASSERT((int)sink_grp.size() >= 1); - if (sink_grp.size() == 1) { - pin_count++; - continue; - } else { - // get the ptc_number of the sinks in the group - std::for_each(sink_grp.begin(), sink_grp.end(), [&rr_graph](int& sink_rr_num) { - sink_rr_num = rr_graph.node_ptc_num(RRNodeId(sink_rr_num)); - }); - auto physical_type = device_ctx.grid.get_physical_type({blk_loc.loc.x, blk_loc.loc.y, blk_loc.loc.layer}); - // Get the choke points of the sink corresponds to pin_count given the sink group - auto sink_choking_spots = get_sink_choking_points(physical_type, - rr_graph.node_ptc_num(RRNodeId(net_rr_terminal[net_id][pin_count])), - sink_grp); - // Store choke points rr_node_id and the number reachable sinks - for (const auto& choking_spot : sink_choking_spots) { - int pin_physical_num = choking_spot.first; - int num_reachable_sinks = choking_spot.second; - auto pin_rr_node_id = get_pin_rr_node_id(rr_graph.node_lookup(), - physical_type, - blk_loc.loc.layer, - blk_loc.loc.x, - blk_loc.loc.y, - pin_physical_num); - if (pin_rr_node_id != RRNodeId::INVALID()) { - choking_spots[net_id][pin_count].insert(std::make_pair(pin_rr_node_id, num_reachable_sinks)); - } - } - } - pin_count++; - } - } - - return choking_spots; -} - -#ifndef NO_GRAPHICS -// updates router iteration information and checks for router iteration and net id breakpoints -// stops after the specified router iteration or net id is encountered -void update_router_info_and_check_bp(bp_router_type type, int net_id) { - t_draw_state* draw_state = get_draw_state_vars(); - if (draw_state->list_of_breakpoints.size() != 0) { - if (type == BP_ROUTE_ITER) - get_bp_state_globals()->get_glob_breakpoint_state()->router_iter++; - else if (type == BP_NET_ID) - get_bp_state_globals()->get_glob_breakpoint_state()->route_net_id = net_id; - f_router_debug = check_for_breakpoints(false); - if (f_router_debug) { - breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), false); - update_screen(ScreenUpdatePriority::MAJOR, "Breakpoint Encountered", ROUTING, nullptr); - } - } -} -#endif diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h deleted file mode 100644 index 38495bb806b..00000000000 --- a/vpr/src/route/route_timing.h +++ /dev/null @@ -1,291 +0,0 @@ -#pragma once - -#include -#include - -#include "connection_based_routing.h" -#include "connection_router_interface.h" -#include "heap_type.h" -#include "netlist.h" -#include "route_budgets.h" -#include "router_stats.h" -#include "router_lookahead.h" -#include "routing_predictor.h" -#include "rr_graph_type.h" -#include "spatial_route_tree_lookup.h" -#include "timing_info_fwd.h" -#include "vpr_types.h" -#include "vpr_utils.h" - -#include "NetPinTimingInvalidator.h" - -extern bool f_router_debug; - -/** TODO: remove timing_driven_route_structs together with this fn */ -int get_max_pins_per_net(const Netlist<>& net_list); - -/** Types and defines common to timing_driven and parallel routers */ - -#define CONGESTED_SLOPE_VAL -0.04 - -/** Per-iteration congestion mode for the router: focus more on routability after a certain threshold */ -enum class RouterCongestionMode { - NORMAL, - CONFLICTED -}; - -/** Identifies the two breakpoint types in routing */ -typedef enum router_breakpoint_type { - BP_ROUTE_ITER, - BP_NET_ID -} bp_router_type; - -/** Results from attempting to route a net. - * success: Could we route it? - * was_rerouted: Is the routing different from the last one? (set by try_* functions) - * retry_with_full_bb: Should we retry this net with a full-device bounding box? (used in the parallel router) - * - * I'm fine with returning 3 bytes from a fn: consider an enum class if this becomes too big */ -struct NetResultFlags { - bool success = false; - bool was_rerouted = false; - bool retry_with_full_bb = false; -}; - -struct RoutingMetrics { - size_t used_wirelength = 0; - - float sWNS = std::numeric_limits::quiet_NaN(); - float sTNS = std::numeric_limits::quiet_NaN(); - float hWNS = std::numeric_limits::quiet_NaN(); - float hTNS = std::numeric_limits::quiet_NaN(); - tatum::TimingPathInfo critical_path; -}; - -/* Data while timing driven route is active */ -class timing_driven_route_structs { - public: - std::vector pin_criticality; /* [1..max_pins_per_net-1] */ - - timing_driven_route_structs(const Netlist<>& net_list) { - int max_sinks = std::max(get_max_pins_per_net(net_list) - 1, 0); - pin_criticality.resize(max_sinks + 1); - - /* Set element 0 to invalid values */ - pin_criticality[0] = std::numeric_limits::quiet_NaN(); - } -}; - -/** Returns the bounding box of a net's used routing resources */ -t_bb calc_current_bb(const RouteTree& tree); - -/** Get available wirelength for the current RR graph */ -size_t calculate_wirelength_available(); - -/** Calculate wirelength for the current routing and populate a WirelengthInfo */ -WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength); - -size_t dynamic_update_bounding_boxes(const std::vector& updated_nets, - const Netlist<>& net_list, - int high_fanout_threshold); - -/** Early exit code for cases where it is obvious that a successful route will not be found - * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ -bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info); - -/** Give-up on reconvergent routing if the CPD improvement after the - * first iteration since convergence is small, compared to the best - * CPD seen so far */ -bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, - int itry_since_last_convergence, - std::shared_ptr timing_info, - const RoutingMetrics& best_routing_metrics); - -void enable_router_debug(const t_router_opts& router_opts, ParentNetId net, RRNodeId sink_rr, int router_iteration, ConnectionRouterInterface* router); - -void generate_route_timing_reports(const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const RoutingDelayCalculator& delay_calc, - bool is_flat); - -/** Initialize net_delay based on best-case delay estimates from the router lookahead. */ -void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, - const Netlist<>& net_list, - const vtr::vector>& net_rr_terminals, - NetPinsMatrix& net_delay, - const RRGraphView& rr_graph, - bool is_flat); - -void init_router_stats(RouterStats& router_stats); - -bool is_better_quality_routing(const vtr::vector>& best_routing, - const RoutingMetrics& best_routing_metrics, - const WirelengthInfo& wirelength_info, - std::shared_ptr timing_info); - -bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished); - -/** Print the index of this routing failure */ -void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info); - -void print_route_status_header(); - -void print_route_status(int itry, - double elapsed_sec, - float pres_fac, - int num_bb_updated, - const RouterStats& router_stats, - const OveruseInfo& overuse_info, - const WirelengthInfo& wirelength_info, - std::shared_ptr timing_info, - float est_success_iteration); - -void print_router_criticality_histogram(const Netlist<>& net_list, - const SetupTimingInfo& timing_info, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - bool is_flat); - -/** If a route is ripped up during routing, non-configurable sets are left - * behind. As a result, the final routing may have stubs at - * non-configurable sets. This function tracks non-configurable set usage, - * and if the sets are unused, prunes them. */ -void prune_unused_non_configurable_nets(CBRR& connections_inf, - const Netlist<>& net_list); - -/** - * If flat_routing and has_choking_spot are true, there are some choke points inside the cluster which would increase the convergence time of routing. - * To address this issue, the congestion cost of those choke points needs to decrease. This function identify those choke points for each net, - * and since the amount of congestion reduction is dependant on the number sinks reachable from that choke point, it also store the number of reachable sinks - * for each choke point. - * @param net_list - * @param net_terminal_groups [Net_id][group_id] -> rr_node_id of the pins in the group - * @param net_terminal_group_num [Net_id][pin_id] -> group_id - * @param has_choking_spot is true if the given architecture has choking spots inside the cluster - * @param is_flat is true if flat_routing is enabled - * @return [Net_id][pin_id] -> [choke_point_rr_node_id, number of sinks reachable by this choke point] - */ - -vtr::vector>> set_nets_choking_spots(const Netlist<>& net_list, - const vtr::vector>>& net_terminal_groups, - const vtr::vector>& net_terminal_group_num, - bool has_choking_spot, - bool is_flat); - -/** Detect if net should be routed or not */ -bool should_route_net(ParentNetId net_id, - CBRR& connections_inf, - bool if_force_reroute); - -bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& router_opts); - -bool timing_driven_check_net_delays(const Netlist<>& net_list, - NetPinsMatrix& net_delay); - -bool try_timing_driven_route(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat); - -/** Attempt to route a single net. - * - * @param router The ConnectionRouter instance - * @param net_list Input netlist - * @param net_id - * @param itry # of iteration - * @param pres_fac - * @param router_opts - * @param connections_inf - * @param router_stats - * @param pin_criticality - * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes - * @param net_delay - * @param netlist_pin_lookup - * @param timing_info - * @param pin_timing_invalidator - * @param budgeting_inf - * @param worst_neg_slack - * @param routing_predictor - * @param choking_spots - * @param is_flat - * @return NetResultFlags for this net. success = false means the RR graph is disconnected and the caller can give up */ -template -NetResultFlags timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - float* net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_neg_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat); - -template -NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - const ParentNetId& net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_negative_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat); - -/** Update net_delay value for a single sink in a RouteTree. */ -inline void update_net_delay_from_isink(float* net_delay, - const RouteTree& tree, - int isink, - const Netlist<>& net_list, - ParentNetId inet, - TimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator) { - float new_delay = tree.find_by_isink(isink)->Tdel; - - if (pin_timing_invalidator && new_delay != net_delay[isink]) { - //Delay changed, invalidate for incremental timing update - VTR_ASSERT_SAFE(timing_info); - ParentPinId pin = net_list.net_pin(inet, isink); - pin_timing_invalidator->invalidate_connection(pin, timing_info); - } - - net_delay[isink] = new_delay; -} - -void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats); - -#ifndef NO_GRAPHICS -void update_router_info_and_check_bp(bp_router_type type, int net_id); -#endif - -void update_rr_base_costs(int fanout); - -/** Traverses down a route tree and updates rr_node_inf for all nodes - * to reflect that these nodes have already been routed to */ -void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node); diff --git a/vpr/src/route/route_tree.cpp b/vpr/src/route/route_tree.cpp index 36f37461527..4c4e37c3052 100644 --- a/vpr/src/route/route_tree.cpp +++ b/vpr/src/route/route_tree.cpp @@ -1,7 +1,9 @@ #include "route_tree.h" + +#include "connection_based_routing.h" #include "globals.h" #include "netlist_fwd.h" -#include "route_timing.h" +#include "route_debug.h" #include "rr_graph_fwd.h" #include "vtr_math.h" diff --git a/vpr/src/route/route_util.cpp b/vpr/src/route/route_utilization.cpp similarity index 99% rename from vpr/src/route/route_util.cpp rename to vpr/src/route/route_utilization.cpp index 8ca7f657124..ec6da92cf1d 100644 --- a/vpr/src/route/route_util.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -1,4 +1,4 @@ -#include "route_util.h" +#include "route_utilization.h" #include "globals.h" #include "draw_types.h" #include "draw_global.h" diff --git a/vpr/src/route/route_util.h b/vpr/src/route/route_utilization.h similarity index 100% rename from vpr/src/route/route_util.h rename to vpr/src/route/route_utilization.h diff --git a/vpr/src/route/route_utils.cpp b/vpr/src/route/route_utils.cpp new file mode 100644 index 00000000000..f90789e5250 --- /dev/null +++ b/vpr/src/route/route_utils.cpp @@ -0,0 +1,536 @@ +/** @file Utility fns for top-level router. */ + +#include "route_utils.h" + +#include "connection_based_routing.h" +#include "draw.h" +#include "draw_debug.h" +#include "draw_global.h" +#include "draw_types.h" +#include "net_delay.h" +#include "overuse_report.h" +#include "place_and_route.h" +#include "route_debug.h" + +#include "VprTimingGraphResolver.h" +#include "tatum/TimingReporter.hpp" + +bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay) { + constexpr float ERROR_TOL = 0.0001; + + unsigned int ipin; + auto net_delay_check = make_net_pins_matrix(net_list); + + load_net_delay_from_routing(net_list, net_delay_check); + + for (auto net_id : net_list.nets()) { + for (ipin = 1; ipin < net_list.net_pins(net_id).size(); ipin++) { + if (net_delay_check[net_id][ipin] == 0.) { /* Should be only GLOBAL nets */ + if (fabs(net_delay[net_id][ipin]) > ERROR_TOL) { + VPR_ERROR(VPR_ERROR_ROUTE, + "in timing_driven_check_net_delays: net %lu pin %d.\n" + "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n", + size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]); + } + } else { + float error = fabs(1.0 - net_delay[net_id][ipin] / net_delay_check[net_id][ipin]); + if (error > ERROR_TOL) { + VPR_ERROR(VPR_ERROR_ROUTE, + "in timing_driven_check_net_delays: net %d pin %lu.\n" + "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n", + size_t(net_id), ipin, net_delay[net_id][ipin], net_delay_check[net_id][ipin]); + } + } + } + } + + return true; +} + +// In heavily congested designs a static bounding box (BB) can +// become problematic for routability (it effectively enforces a +// hard blockage restricting where a net can route). +// +// For instance, the router will try to route non-critical connections +// away from congested regions, but may end up hitting the edge of the +// bounding box. Limiting how far out-of-the-way it can be routed, and +// preventing congestion from resolving. +// +// To alleviate this, we dynamically expand net bounding boxes if the net's +// *current* routing uses RR nodes 'close' to the edge of it's bounding box. +// +// The result is that connections trying to move out of the way and hitting +// their BB will have their bounding boxes will expand slowly in that direction. +// This helps spread out regions of heavy congestion (over several routing +// iterations). +// +// By growing the BBs slowly and only as needed we minimize the size of the BBs. +// This helps keep the router's graph search fast. +// +// Typically, only a small minority of nets (typically > 10%) have their BBs updated +// each routing iteration. +size_t dynamic_update_bounding_boxes(const std::vector& updated_nets) { + auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + auto& grid = device_ctx.grid; + + //Controls how close a net's routing needs to be to it's bounding box + //before the bounding box is expanded. + // + //A value of zero indicates that the routing needs to be at the bounding box + //edge + constexpr int DYNAMIC_BB_DELTA_THRESHOLD = 0; + + //Walk through each net, calculating the bounding box of its current routing, + //and then increase the router's bounding box if the two are close together + + int grid_xmax = grid.width() - 1; + int grid_ymax = grid.height() - 1; + + size_t num_bb_updated = 0; + + for (ParentNetId net : updated_nets) { + if (!route_ctx.route_trees[net]) + continue; // Skip if no routing + if (!route_ctx.net_status.is_routed(net)) + continue; + + t_bb curr_bb = calc_current_bb(route_ctx.route_trees[net].value()); + t_bb& router_bb = route_ctx.route_bb[net]; + + //Calculate the distances between the net's used RR nodes and + //the router's bounding box + int delta_xmin = curr_bb.xmin - router_bb.xmin; + int delta_xmax = router_bb.xmax - curr_bb.xmax; + int delta_ymin = curr_bb.ymin - router_bb.ymin; + int delta_ymax = router_bb.ymax - curr_bb.ymax; + + //Note that if the net uses non-configurable switches it's routing + //may end-up outside the bounding boxes, so the delta values may be + //negative. The code below will expand the bounding box in those + //cases. + + //Expand each dimension by one if within DYNAMIC_BB_DELTA_THRESHOLD threshold + bool updated_bb = false; + if (delta_xmin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmin > 0) { + --router_bb.xmin; + updated_bb = true; + } + + if (delta_ymin <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymin > 0) { + --router_bb.ymin; + updated_bb = true; + } + + if (delta_xmax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.xmax < grid_xmax) { + ++router_bb.xmax; + updated_bb = true; + } + + if (delta_ymax <= DYNAMIC_BB_DELTA_THRESHOLD && router_bb.ymax < grid_ymax) { + ++router_bb.ymax; + updated_bb = true; + } + + if (updated_bb) { + ++num_bb_updated; + //VTR_LOG("Expanded net %6zu router BB to (%d,%d)x(%d,%d) based on net RR node BB (%d,%d)x(%d,%d)\n", size_t(net), + //router_bb.xmin, router_bb.ymin, router_bb.xmax, router_bb.ymax, + //curr_bb.xmin, curr_bb.ymin, curr_bb.xmax, curr_bb.ymax); + } + } + return num_bb_updated; +} + +bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, + int itry_since_last_convergence, + std::shared_ptr timing_info, + const RoutingMetrics& best_routing_metrics) { + if (itry_since_last_convergence == 1) { + float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS; + + // Give up if we see less than a 1% CPD improvement, + // after reducing pres_fac. Typically larger initial + // improvements are needed to see an actual improvement + // in final legal routing quality. + if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) { + VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio); + return true; // Potential CPD improvement is small, don't spend run-time trying to improve it + } + } + + return false; // Don't give up +} + +bool is_better_quality_routing(const vtr::vector>& best_routing, + const RoutingMetrics& best_routing_metrics, + const WirelengthInfo& wirelength_info, + std::shared_ptr timing_info) { + if (best_routing.empty()) { + return true; // First legal routing + } + + // Rank first based on sWNS, followed by other timing metrics + if (timing_info) { + if (timing_info->setup_worst_negative_slack() > best_routing_metrics.sWNS) { + return true; + } else if (timing_info->setup_worst_negative_slack() < best_routing_metrics.sWNS) { + return false; + } + + if (timing_info->setup_total_negative_slack() > best_routing_metrics.sTNS) { + return true; + } else if (timing_info->setup_total_negative_slack() < best_routing_metrics.sTNS) { + return false; + } + + if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) { + return true; + } else if (timing_info->hold_worst_negative_slack() > best_routing_metrics.hWNS) { + return false; + } + + if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) { + return true; + } else if (timing_info->hold_total_negative_slack() > best_routing_metrics.hTNS) { + return false; + } + } + + // Finally, wirelength tie breaker + return wirelength_info.used_wirelength() < best_routing_metrics.used_wirelength; +} + +bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished) { + if (routing_is_feasible) { + if (router_opts.routing_budgets_algorithm != YOYO) { + return true; + } else if (router_opts.routing_budgets_algorithm == YOYO && (timing_info->hold_worst_negative_slack() == 0 || rcv_finished) && itry != 1) { + return true; + } + } + return false; +} + +void generate_route_timing_reports(const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const RoutingDelayCalculator& delay_calc, + bool is_flat) { + auto& timing_ctx = g_vpr_ctx.timing(); + auto& atom_ctx = g_vpr_ctx.atom(); + + VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat); + resolver.set_detail_level(analysis_opts.timing_report_detail); + + tatum::TimingReporter timing_reporter(resolver, *timing_ctx.graph, *timing_ctx.constraints); + + timing_reporter.report_timing_setup(router_opts.first_iteration_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); +} + +int get_max_pins_per_net(const Netlist<>& net_list) { + int max_pins_per_net = 0; + for (auto net_id : net_list.nets()) { + if (!net_list.net_is_ignored(net_id)) + max_pins_per_net = std::max(max_pins_per_net, (int)net_list.net_pins(net_id).size()); + } + + return (max_pins_per_net); +} + +void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info) { + VTR_LOG("\nFailed routing attempt\n"); + + size_t num_overused = overuse_info.overused_nodes; + size_t max_logged_overused_rr_nodes = router_opts.max_logged_overused_rr_nodes; + + //Overused nodes info logging upper limit + VTR_LOG("Total number of overused nodes: %d\n", num_overused); + if (num_overused > max_logged_overused_rr_nodes) { + VTR_LOG("Total number of overused nodes is larger than the logging limit (%d).\n", max_logged_overused_rr_nodes); + VTR_LOG("Displaying the first %d entries.\n", max_logged_overused_rr_nodes); + } + + log_overused_nodes_status(max_logged_overused_rr_nodes); + VTR_LOG("\n"); +} + +void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr timing_info, float est_success_iteration) { + //Iteration + VTR_LOG("%4d", itry); + + //Elapsed Time + VTR_LOG(" %6.1f", elapsed_sec); + + //pres_fac + constexpr int PRES_FAC_DIGITS = 7; + constexpr int PRES_FAC_SCI_PRECISION = 1; + pretty_print_float(" ", pres_fac, PRES_FAC_DIGITS, PRES_FAC_SCI_PRECISION); + //VTR_LOG(" %5.1f", pres_fac); + + //Number of bounding boxes updated + VTR_LOG(" %4d", num_bb_updated); + + //Heap push/pop + constexpr int HEAP_OP_DIGITS = 7; + constexpr int HEAP_OP_SCI_PRECISION = 2; + pretty_print_uint(" ", router_stats.heap_pushes, HEAP_OP_DIGITS, HEAP_OP_SCI_PRECISION); + VTR_ASSERT(router_stats.heap_pops <= router_stats.heap_pushes); + + //Rerouted nets + constexpr int NET_ROUTED_DIGITS = 7; + constexpr int NET_ROUTED_SCI_PRECISION = 2; + pretty_print_uint(" ", router_stats.nets_routed, NET_ROUTED_DIGITS, NET_ROUTED_SCI_PRECISION); + + //Rerouted connections + constexpr int CONN_ROUTED_DIGITS = 7; + constexpr int CONN_ROUTED_SCI_PRECISION = 2; + pretty_print_uint(" ", router_stats.connections_routed, CONN_ROUTED_DIGITS, CONN_ROUTED_SCI_PRECISION); + + //Overused RR nodes + constexpr int OVERUSE_DIGITS = 7; + constexpr int OVERUSE_SCI_PRECISION = 2; + pretty_print_uint(" ", overuse_info.overused_nodes, OVERUSE_DIGITS, OVERUSE_SCI_PRECISION); + VTR_LOG(" (%6.3f%%)", overuse_info.overused_node_ratio() * 100); + + //Wirelength + constexpr int WL_DIGITS = 7; + constexpr int WL_SCI_PRECISION = 2; + pretty_print_uint(" ", wirelength_info.used_wirelength(), WL_DIGITS, WL_SCI_PRECISION); + VTR_LOG(" (%4.1f%%)", wirelength_info.used_wirelength_ratio() * 100); + + //CPD + if (timing_info) { + float cpd = timing_info->least_slack_critical_path().delay(); + VTR_LOG(" %#8.3f", 1e9 * cpd); + } else { + VTR_LOG(" %8s", "N/A"); + } + + //sTNS + if (timing_info) { + float sTNS = timing_info->setup_total_negative_slack(); + VTR_LOG(" % #10.4g", 1e9 * sTNS); + } else { + VTR_LOG(" %10s", "N/A"); + } + + //sWNS + if (timing_info) { + float sWNS = timing_info->setup_worst_negative_slack(); + VTR_LOG(" % #10.3f", 1e9 * sWNS); + } else { + VTR_LOG(" %10s", "N/A"); + } + + //hTNS + if (timing_info) { + float hTNS = timing_info->hold_total_negative_slack(); + VTR_LOG(" % #10.4g", 1e9 * hTNS); + } else { + VTR_LOG(" %10s", "N/A"); + } + + //hWNS + if (timing_info) { + float hWNS = timing_info->hold_worst_negative_slack(); + VTR_LOG(" % #10.3f", 1e9 * hWNS); + } else { + VTR_LOG(" %10s", "N/A"); + } + + //Estimated success iteration + if (std::isnan(est_success_iteration)) { + VTR_LOG(" %8s", "N/A"); + } else { + VTR_LOG(" %8.0f", est_success_iteration); + } + + VTR_LOG("\n"); + + fflush(stdout); +} + +void print_route_status_header() { + VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n"); + VTR_LOG("Iter Time pres BBs Heap Re-Rtd Re-Rtd Overused RR Nodes Wirelength CPD sTNS sWNS hTNS hWNS Est Succ\n"); + VTR_LOG(" (sec) fac Updt push Nets Conns (ns) (ns) (ns) (ns) (ns) Iter\n"); + VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n"); +} + +void print_router_criticality_histogram(const Netlist<>& net_list, + const SetupTimingInfo& timing_info, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + bool is_flat) { + print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10)); +} + +void prune_unused_non_configurable_nets(CBRR& connections_inf, + const Netlist<>& net_list) { + auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + std::vector non_config_node_set_usage(device_ctx.rr_non_config_node_sets.size(), 0); + for (auto net_id : net_list.nets()) { + if (!route_ctx.route_trees[net_id]) + continue; + RouteTree& tree = route_ctx.route_trees[net_id].value(); + + connections_inf.clear_force_reroute_for_net(net_id); + + std::vector usage = tree.get_non_config_node_set_usage(); + + // Prune the branches of the tree that don't legally lead to sinks + tree.prune(connections_inf, &usage); + } +} + +vtr::vector>> set_nets_choking_spots(const Netlist<>& net_list, + const vtr::vector>>& net_terminal_groups, + const vtr::vector>& net_terminal_group_num, + bool has_choking_spot, + bool is_flat) { + vtr::vector>> choking_spots(net_list.nets().size()); + for (const auto& net_id : net_list.nets()) { + choking_spots[net_id].resize(net_list.net_pins(net_id).size()); + } + + // Return if the architecture doesn't have any potential choke points + if (!has_choking_spot) { + return choking_spots; + } + + // We only identify choke points if flat_routing is enabled. + VTR_ASSERT(is_flat); + + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + const auto& route_ctx = g_vpr_ctx.routing(); + const auto& net_rr_terminal = route_ctx.net_rr_terminals; + + for (const auto& net_id : net_list.nets()) { + int pin_count = 0; + // Global nets are not routed, thus we don't consider them. + if (net_list.net_is_global(net_id)) { + continue; + } + for (auto pin_id : net_list.net_pins(net_id)) { + // pin_count == 0 corresponds to the net's source pin + if (pin_count == 0) { + pin_count++; + continue; + } + auto block_id = net_list.pin_block(pin_id); + auto blk_loc = get_block_loc(block_id, is_flat); + int group_num = net_terminal_group_num[net_id][pin_count]; + // This is a group of sinks, including the current pin_id, which share a specific number of parent blocks. + // To determine the choke points of the current sink, pin_id, we only consider the sinks in this group for the + // run-time purpose + std::vector sink_grp = net_terminal_groups[net_id][group_num]; + VTR_ASSERT((int)sink_grp.size() >= 1); + if (sink_grp.size() == 1) { + pin_count++; + continue; + } else { + // get the ptc_number of the sinks in the group + std::for_each(sink_grp.begin(), sink_grp.end(), [&rr_graph](int& sink_rr_num) { + sink_rr_num = rr_graph.node_ptc_num(RRNodeId(sink_rr_num)); + }); + auto physical_type = device_ctx.grid.get_physical_type({blk_loc.loc.x, blk_loc.loc.y, blk_loc.loc.layer}); + // Get the choke points of the sink corresponds to pin_count given the sink group + auto sink_choking_spots = get_sink_choking_points(physical_type, + rr_graph.node_ptc_num(RRNodeId(net_rr_terminal[net_id][pin_count])), + sink_grp); + // Store choke points rr_node_id and the number reachable sinks + for (const auto& choking_spot : sink_choking_spots) { + int pin_physical_num = choking_spot.first; + int num_reachable_sinks = choking_spot.second; + auto pin_rr_node_id = get_pin_rr_node_id(rr_graph.node_lookup(), + physical_type, + blk_loc.loc.layer, + blk_loc.loc.x, + blk_loc.loc.y, + pin_physical_num); + if (pin_rr_node_id != RRNodeId::INVALID()) { + choking_spots[net_id][pin_count].insert(std::make_pair(pin_rr_node_id, num_reachable_sinks)); + } + } + } + pin_count++; + } + } + + return choking_spots; +} + +/** Wrapper for create_rr_graph() with extra checks */ +void try_graph(int width_fac, + const t_router_opts& router_opts, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + t_chan_width_dist chan_width_dist, + t_direct_inf* directs, + int num_directs, + bool is_flat) { + auto& device_ctx = g_vpr_ctx.mutable_device(); + + t_graph_type graph_type; + t_graph_type graph_directionality; + if (router_opts.route_type == GLOBAL) { + graph_type = GRAPH_GLOBAL; + graph_directionality = GRAPH_BIDIR; + } else { + graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); + graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR); + } + + /* Set the channel widths */ + t_chan_width chan_width = init_chan(width_fac, chan_width_dist, graph_directionality); + + /* Free any old routing graph, if one exists. */ + free_rr_graph(); + + /* Set up the routing resource graph defined by this FPGA architecture. */ + int warning_count; + create_rr_graph(graph_type, + device_ctx.physical_tile_types, + device_ctx.grid, + chan_width, + det_routing_arch, + segment_inf, + router_opts, + directs, num_directs, + &warning_count, + is_flat); +} + +float update_draw_pres_fac(float new_pres_fac) { +#ifndef NO_GRAPHICS + + // Only updates the drawing pres_fac if graphics is enabled + get_draw_state_vars()->pres_fac = new_pres_fac; + +#endif // NO_GRAPHICS + + return new_pres_fac; +} + +#ifndef NO_GRAPHICS +void update_router_info_and_check_bp(bp_router_type type, int net_id) { + t_draw_state* draw_state = get_draw_state_vars(); + if (draw_state->list_of_breakpoints.size() != 0) { + if (type == BP_ROUTE_ITER) + get_bp_state_globals()->get_glob_breakpoint_state()->router_iter++; + else if (type == BP_NET_ID) + get_bp_state_globals()->get_glob_breakpoint_state()->route_net_id = net_id; + f_router_debug = check_for_breakpoints(false); + if (f_router_debug) { + breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), false); + update_screen(ScreenUpdatePriority::MAJOR, "Breakpoint Encountered", ROUTING, nullptr); + } + } +} +#endif diff --git a/vpr/src/route/route_utils.h b/vpr/src/route/route_utils.h new file mode 100644 index 00000000000..8b86f230290 --- /dev/null +++ b/vpr/src/route/route_utils.h @@ -0,0 +1,148 @@ +#pragma once + +/** @file Utility functions used in the top-level router (route.cpp). */ + +#include "router_stats.h" +#include "timing_info.h" +#include "vpr_net_pins_matrix.h" +#include "vpr_types.h" + +#include "RoutingDelayCalculator.h" + +constexpr float CONGESTED_SLOPE_VAL = -0.04; + +/** Identifies the two breakpoint types in routing */ +typedef enum router_breakpoint_type { + BP_ROUTE_ITER, + BP_NET_ID +} bp_router_type; + +/** Per-iteration congestion mode for the router: focus more on routability after a certain threshold */ +enum class RouterCongestionMode { + NORMAL, + CONFLICTED +}; + +struct RoutingMetrics { + size_t used_wirelength = 0; + + float sWNS = std::numeric_limits::quiet_NaN(); + float sTNS = std::numeric_limits::quiet_NaN(); + float hWNS = std::numeric_limits::quiet_NaN(); + float hTNS = std::numeric_limits::quiet_NaN(); + tatum::TimingPathInfo critical_path; +}; + +/** Returns the bounding box of a net's used routing resources */ +t_bb calc_current_bb(const RouteTree& tree); + +/** Get available wirelength for the current RR graph */ +size_t calculate_wirelength_available(); + +/** Calculate wirelength for the current routing and populate a WirelengthInfo */ +WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength); + +/** Checks that the net delays computed incrementally during timing driven + * routing match those computed from scratch by the net_delay.cpp module. */ +bool check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay); + +/** Update bounding box for net if existing routing is close to boundary */ +size_t dynamic_update_bounding_boxes(const std::vector& updated_nets); + +/** Early exit code for cases where it is obvious that a successful route will not be found + * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ +bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info); + +/** Give-up on reconvergent routing if the CPD improvement after the + * first iteration since convergence is small, compared to the best + * CPD seen so far */ +bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, + int itry_since_last_convergence, + std::shared_ptr timing_info, + const RoutingMetrics& best_routing_metrics); + +void generate_route_timing_reports(const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const RoutingDelayCalculator& delay_calc, + bool is_flat); + +/** Get the maximum number of pins used in the netlist (used to allocate things) */ +int get_max_pins_per_net(const Netlist<>& net_list); + +/** Initialize net_delay based on best-case delay estimates from the router lookahead. */ +void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, + const Netlist<>& net_list, + const vtr::vector>& net_rr_terminals, + NetPinsMatrix& net_delay, + const RRGraphView& rr_graph, + bool is_flat); + +bool is_better_quality_routing(const vtr::vector>& best_routing, + const RoutingMetrics& best_routing_metrics, + const WirelengthInfo& wirelength_info, + std::shared_ptr timing_info); + +/** This function checks if a routing iteration has completed. + * When VPR is run normally, we check if routing_budgets_algorithm is disabled, and if the routing is legal + * With the introduction of yoyo budgeting algorithm, we must check if there are no hold violations + * in addition to routing being legal and the correct budgeting algorithm being set. */ +bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished); + +void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info); + +void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr timing_info, float est_success_iteration); + +void print_route_status_header(); + +void print_router_criticality_histogram(const Netlist<>& net_list, + const SetupTimingInfo& timing_info, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + bool is_flat); + +/** Prune stubs of non-config nodes from route_ctx.route_trees. + * If a route is ripped up during routing, non-configurable sets are left + * behind. As a result, the final routing may have stubs at + * non-configurable sets. This function tracks non-configurable set usage, + * and if the sets are unused, prunes them. */ +void prune_unused_non_configurable_nets(CBRR& connections_inf, + const Netlist<>& net_list); + +/** If flat_routing and has_choking_spot are true, there are some choke points inside the cluster which would increase the convergence time of routing. + * To address this issue, the congestion cost of those choke points needs to decrease. This function identify those choke points for each net, + * and since the amount of congestion reduction is dependant on the number sinks reachable from that choke point, it also store the number of reachable sinks + * for each choke point. + * @param net_list + * @param net_terminal_groups [Net_id][group_id] -> rr_node_id of the pins in the group + * @param net_terminal_group_num [Net_id][pin_id] -> group_id + * @param has_choking_spot is true if the given architecture has choking spots inside the cluster + * @param is_flat is true if flat_routing is enabled + * @return [Net_id][pin_id] -> [choke_point_rr_node_id, number of sinks reachable by this choke point] */ +vtr::vector>> set_nets_choking_spots(const Netlist<>& net_list, + const vtr::vector>>& net_terminal_groups, + const vtr::vector>& net_terminal_group_num, + bool has_choking_spot, + bool is_flat); + +/** Wrapper for create_rr_graph() with extra checks */ +void try_graph(int width_fac, + const t_router_opts& router_opts, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + t_chan_width_dist chan_width_dist, + t_direct_inf* directs, + int num_directs, + bool is_flat); + +/* This routine should take the new value of the present congestion factor + * and propagate it to all the relevant data fields in the vpr flow. + * Currently, it only updates the pres_fac used by the drawing functions */ +float update_draw_pres_fac(float new_pres_fac); + +#ifndef NO_GRAPHICS +/** Updates router iteration information and checks for router iteration and net id breakpoints + * Stops after the specified router iteration or net id is encountered */ +void update_router_info_and_check_bp(bp_router_type type, int net_id); +#endif diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp index eac8fdf28c4..d0b95283641 100644 --- a/vpr/src/route/router_delay_profiling.cpp +++ b/vpr/src/route/router_delay_profiling.cpp @@ -1,7 +1,7 @@ #include "router_delay_profiling.h" #include "globals.h" #include "route_common.h" -#include "route_timing.h" +#include "route_net.h" #include "route_export.h" #include "route_tree.h" #include "rr_graph.h" @@ -89,8 +89,7 @@ bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, cost_params, bounding_box, router_stats, - conn_params, - true); + conn_params); if (found_path) { VTR_ASSERT(cheapest.index == sink_node); diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h index 11d8eb25f1d..13bae0d0301 100644 --- a/vpr/src/route/router_delay_profiling.h +++ b/vpr/src/route/router_delay_profiling.h @@ -2,7 +2,6 @@ #define ROUTER_DELAY_PROFILING_H_ #include "vpr_types.h" -#include "route_timing.h" #include "binary_heap.h" #include "connection_router.h" diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index 14b6aaa1959..545704e7d06 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -1,10 +1,10 @@ #include "router_lookahead.h" +#include "connection_router_interface.h" #include "router_lookahead_map.h" #include "router_lookahead_extended_map.h" #include "vpr_error.h" #include "globals.h" -#include "route_timing.h" static int get_expected_segs_to_target(RRNodeId inode, RRNodeId target_node, int* num_segs_ortho_dir_ptr); static int round_up(float x); diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp index b176372e686..102a176f92e 100644 --- a/vpr/src/route/router_lookahead_extended_map.cpp +++ b/vpr/src/route/router_lookahead_extended_map.cpp @@ -4,6 +4,7 @@ #include #include +#include "connection_router_interface.h" #include "rr_node.h" #include "router_lookahead_map_utils.h" #include "router_lookahead_sampling.h" @@ -14,8 +15,8 @@ #include "echo_files.h" #include "rr_graph.h" -#include "route_timing.h" #include "route_common.h" +#include "route_debug.h" #ifdef VTR_ENABLE_CAPNPROTO # include "capnp/serialize.h" diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 5f5f8e886f2..0eea1a6c913 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -25,6 +25,7 @@ #include #include #include +#include "connection_router_interface.h" #include "vpr_types.h" #include "vpr_error.h" #include "vpr_utils.h" @@ -39,7 +40,6 @@ #include "rr_graph2.h" #include "rr_graph.h" #include "route_common.h" -#include "route_timing.h" #ifdef VTR_ENABLE_CAPNPROTO # include "capnp/serialize.h" diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 01c0e79d16c..c9c7017f83c 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -1,7 +1,6 @@ #include "router_lookahead_map_utils.h" -/* - * This file contains utility functions that can be shared among different +/** @file This file contains utility functions that can be shared among different * lookahead computation strategies. * * In general, this utility library contains: @@ -9,15 +8,14 @@ * - Different dijkstra expansion algorithms used to perform specific tasks, such as computing the SOURCE/OPIN --> CHAN lookup tables * - Cost Entries definitions used when generating and querying the lookahead * - * To access the utility functions, the util namespace needs to be used. - */ + * To access the utility functions, the util namespace needs to be used. */ #include "globals.h" #include "vpr_context.h" #include "vtr_math.h" #include "vtr_time.h" #include "route_common.h" -#include "route_timing.h" +#include "route_debug.h" static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays, util::t_src_opin_inter_layer_delays& src_opin_inter_layer_delays, bool is_multi_layer); diff --git a/vpr/src/route/router_stats.h b/vpr/src/route/router_stats.h index 47e91731179..4f999a722d1 100644 --- a/vpr/src/route/router_stats.h +++ b/vpr/src/route/router_stats.h @@ -51,6 +51,30 @@ struct RouterStats { size_t add_all_rt_from_high_fanout = 0; size_t add_high_fanout_rt = 0; size_t add_all_rt = 0; + + /** Add rhs's stats to mine */ + void combine(RouterStats& rhs) { + connections_routed += rhs.connections_routed; + nets_routed += rhs.nets_routed; + heap_pushes += rhs.heap_pushes; + inter_cluster_node_pushes += rhs.inter_cluster_node_pushes; + intra_cluster_node_pushes += rhs.intra_cluster_node_pushes; + heap_pops += rhs.heap_pops; + inter_cluster_node_pops += rhs.inter_cluster_node_pops; + intra_cluster_node_pops += rhs.intra_cluster_node_pops; + for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { + inter_cluster_node_type_cnt_pushes[node_type_idx] += rhs.inter_cluster_node_type_cnt_pushes[node_type_idx]; + inter_cluster_node_type_cnt_pops[node_type_idx] += rhs.inter_cluster_node_type_cnt_pops[node_type_idx]; + intra_cluster_node_type_cnt_pushes[node_type_idx] += rhs.intra_cluster_node_type_cnt_pushes[node_type_idx]; + intra_cluster_node_type_cnt_pops[node_type_idx] += rhs.intra_cluster_node_type_cnt_pops[node_type_idx]; + rt_node_pushes[node_type_idx] += rhs.rt_node_pushes[node_type_idx]; + rt_node_high_fanout_pushes[node_type_idx] += rhs.rt_node_high_fanout_pushes[node_type_idx]; + rt_node_entire_tree_pushes[node_type_idx] += rhs.rt_node_entire_tree_pushes[node_type_idx]; + } + add_all_rt += rhs.add_all_rt; + add_all_rt_from_high_fanout += rhs.add_all_rt_from_high_fanout; + add_high_fanout_rt += rhs.add_high_fanout_rt; + } }; class WirelengthInfo { diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 0fef4f22a84..1b0c236a29a 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -1,6 +1,7 @@ #include #include "catch2/catch_test_macros.hpp" +#include "route_net.h" #include "rr_graph_fwd.h" #include "vpr_api.h" #include "vpr_signal_handler.h" @@ -75,8 +76,7 @@ static float do_one_route(RRNodeId source_node, cost_params, bounding_box, router_stats, - conn_params, - true); + conn_params); // Default delay is infinity, which indicates that a route was not found. float delay = std::numeric_limits::infinity(); diff --git a/vtr_flow/scripts/python_libs/vtr/task.py b/vtr_flow/scripts/python_libs/vtr/task.py index 6bf898a5d22..0cfb6f3ebbe 100644 --- a/vtr_flow/scripts/python_libs/vtr/task.py +++ b/vtr_flow/scripts/python_libs/vtr/task.py @@ -633,9 +633,11 @@ def create_job( prev_run_dir = get_existing_run_dir(find_task_dir(config, args.alt_tasks_dir), prev_run) prev_work_path = Path(prev_run_dir) / work_dir / param_string prev_file = prev_work_path / "{}.{}".format(Path(circuit).stem, extension) - if not prev_file.exists(): - raise FileNotFoundError("use_previous: file %s not found" % str(prev_file)) - current_cmd += [option, str(prev_file)] + if option == "REPLACE_BLIF": + current_cmd[0] = str(prev_file) + current_cmd += ["-start", "vpr"] + else: + current_cmd += [option, str(prev_file)] if param_string != "common": current_cmd += param.split(" ") diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py index 8eec41661ba..e19935b4b21 100644 --- a/vtr_flow/scripts/python_libs/vtr/util.py +++ b/vtr_flow/scripts/python_libs/vtr/util.py @@ -442,23 +442,24 @@ def format_elapsed_time(time_delta): "route": ["route", "--route_file"], "rr_graph": ["rr_graph.xml", "--read_rr_graph"], "lookahead": ["lookahead.bin", "--read_router_lookahead"], + "blif": ["pre-vpr.blif", "REPLACE_BLIF"], } -def argparse_use_previous(inp: str) -> List[Tuple[str, List]]: +def argparse_use_previous(x: str) -> List[Tuple[str, List]]: """ Parse a -use_previous parameter. Throw if not valid. Returns a list with (run dir name, [extension, cmdline option]) elements. """ - tokens = [w.strip() for w in inp.split(",")] + tokens = [w.strip() for w in x.split(",")] tokens = [w for w in tokens if len(w)] out = [] for w in tokens: r = re.fullmatch(r"(\w+):(\w+)", w) if not r: - raise argparse.ArgumentTypeError("Invalid input to -use_previous: %s" % w) + raise argparse.ArgumentError("Invalid input to -use_previous: %s" % w) if not REUSABLE_FILES.get(r.group(2)): - raise argparse.ArgumentTypeError( + raise argparse.ArgumentError( "Unknown file type to use_previous: %s, available types: %s" % (r.group(2), ",".join(REUSABLE_FILES.keys())) )