diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index dac6a7fce0d..6e631b5fa6b 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -909,43 +909,6 @@ RouteStatus vpr_route_fixed_W(const Netlist<>& net_list, std::shared_ptr delay_calc, NetPinsMatrix& net_delay, bool is_flat) { - get_cached_router_lookahead( - vpr_setup.RoutingArch, - vpr_setup.RouterOpts.lookahead_type, - vpr_setup.RouterOpts.write_router_lookahead, - vpr_setup.RouterOpts.read_router_lookahead, - vpr_setup.Segments, - is_flat); - - if (is_flat) { - vtr::ScopedStartFinishTimer timer2("Adding intra-cluster resources to router lookahead"); - auto& route_ctx = g_vpr_ctx.mutable_routing(); - auto& router_opts = vpr_setup.RouterOpts; - auto& segment_inf = vpr_setup.Segments; - auto& det_routing_arch = vpr_setup.RoutingArch; - // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since - // they haven't been initialized when the map related to global resources was initialized. - auto cache_key = route_ctx.router_lookahead_cache_key_; - std::unique_ptr mut_router_lookahead(route_ctx.cached_router_lookahead_.release()); - VTR_ASSERT(mut_router_lookahead); - route_ctx.cached_router_lookahead_.clear(); - if (!router_opts.read_intra_cluster_router_lookahead.empty()) { - mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead); - } else { - mut_router_lookahead->compute_intra_tile(); - } - route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead)); - auto router_lookahead = get_cached_router_lookahead(det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - if (!router_opts.write_intra_cluster_router_lookahead.empty()) { - router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead); - } - } - vtr::ScopedStartFinishTimer timer("Routing"); if (NO_FIXED_CHANNEL_WIDTH == fixed_channel_width || fixed_channel_width <= 0) { diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index b5a446ad5f4..82ebe70d329 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -65,8 +65,8 @@ std::tuple ConnectionRouter::timing_driven_route_conne const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RouterStats& router_stats, const ConnectionParameters& conn_params, bool can_grow_bb) { @@ -99,8 +99,8 @@ std::tuple ConnectionRouter::timing_driven_route_connection const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, bool can_grow_bb) { //Re-add route nodes from the existing route tree to the heap. //They need to be repushed onto the heap since each node's cost is target specific. @@ -201,8 +201,8 @@ std::tuple ConnectionRouter::timing_driven_route_conne const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb net_bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& net_bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, const ConnectionParameters& conn_params, @@ -278,8 +278,8 @@ std::tuple ConnectionRouter::timing_driven_route_conne // Returns either the last element of the path, or nullptr if no path is found template t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box) { + const t_conn_cost_params& cost_params, + const t_bb& bounding_box) { VTR_ASSERT_SAFE(heap_.is_valid()); if (heap_.is_empty_heap()) { //No source @@ -343,8 +343,8 @@ t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(RRNodeI template vtr::vector ConnectionRouter::timing_driven_find_all_shortest_paths_from_route_tree( const RouteTree& tree, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RouterStats& router_stats, const ConnectionParameters& conn_params) { router_stats_ = &router_stats; @@ -370,8 +370,8 @@ vtr::vector ConnectionRouter::timing_driven_find_all_sho // no-operation lookahead which always returns zero. template vtr::vector ConnectionRouter::timing_driven_find_all_shortest_paths_from_heap( - const t_conn_cost_params cost_params, - t_bb bounding_box) { + const t_conn_cost_params& cost_params, + const t_bb& bounding_box) { vtr::vector cheapest_paths(rr_nodes_.size()); VTR_ASSERT_SAFE(heap_.is_valid()); @@ -422,8 +422,8 @@ vtr::vector ConnectionRouter::timing_driven_find_all_sho template void ConnectionRouter::timing_driven_expand_cheapest(t_heap* cheapest, RRNodeId target_node, - const t_conn_cost_params cost_params, - t_bb bounding_box) { + const t_conn_cost_params& cost_params, + const t_bb& bounding_box) { RRNodeId inode = cheapest->index; t_rr_node_route_inf* route_inf = &rr_node_route_inf_[inode]; @@ -470,8 +470,8 @@ void ConnectionRouter::timing_driven_expand_cheapest(t_heap* cheapest, template void ConnectionRouter::timing_driven_expand_neighbours(t_heap* current, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RRNodeId target_node) { /* Puts all the rr_nodes adjacent to current on the heap. */ @@ -533,27 +533,28 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, RRNodeId from_node, RREdgeId from_edge, RRNodeId to_node, - const t_conn_cost_params cost_params, - const t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RRNodeId target_node, - const t_bb target_bb) { + const t_bb& target_bb) { int to_xlow = rr_graph_->node_xlow(to_node); int to_ylow = rr_graph_->node_ylow(to_node); - int to_xhigh = rr_graph_->node_xhigh(to_node); - int to_yhigh = rr_graph_->node_yhigh(to_node); // BB-pruning // Disable BB-pruning if RCV is enabled, as this can make it harder for circuits with high negative hold slack to resolve this // TODO: Only disable pruning if the net has negative hold slack, maybe go off budgets // Parallel router change: only expand if to_node is inside BB - if (!inside_bb(to_node, bounding_box) + if ((to_xlow < bounding_box.xmin + || to_ylow < bounding_box.ymin + || to_xlow > bounding_box.xmax + || to_ylow > bounding_box.ymax) && !rcv_path_manager.is_enabled()) { VTR_LOGV_DEBUG(router_debug_, " %p Pruned expansion of node %d edge %zu -> %d" " (to node location %d,%dx%d,%d outside of expanded" " net bounding box %d,%dx%d,%d)\n", this, from_node, size_t(from_edge), size_t(to_node), - to_xlow, to_ylow, to_xhigh, to_yhigh, + to_xlow, to_ylow, rr_graph_->node_xhigh(to_node), rr_graph_->node_yhigh(to_node), bounding_box.xmin, bounding_box.ymin, bounding_box.xmax, bounding_box.ymax); return; /* Node is outside (expanded) bounding box. */ } @@ -567,6 +568,8 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, if (to_type == IPIN) { // Check if this IPIN leads to the target block // IPIN's of the target block should be contained within it's bounding box + int to_xhigh = rr_graph_->node_xhigh(to_node); + int to_yhigh = rr_graph_->node_yhigh(to_node); if (to_xlow < target_bb.xmin || to_ylow < target_bb.ymin || to_xhigh > target_bb.xmax @@ -606,11 +609,11 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, // Add to_node to the heap, and also add any nodes which are connected by non-configurable edges template -void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params cost_params, +void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params& cost_params, const t_heap* current, RRNodeId from_node, RRNodeId to_node, - const RREdgeId from_edge, + RREdgeId from_edge, RRNodeId target_node) { const auto& device_ctx = g_vpr_ctx.device(); t_heap next; @@ -715,7 +718,7 @@ static bool same_non_config_node_set(RRNodeId from_node, RRNodeId to_node) { #endif template -float ConnectionRouter::compute_node_cost_using_rcv(const t_conn_cost_params cost_params, +float ConnectionRouter::compute_node_cost_using_rcv(const t_conn_cost_params& cost_params, RRNodeId to_node, RRNodeId target_node, float backwards_delay, @@ -769,7 +772,7 @@ void ConnectionRouter::set_rcv_enabled(bool enable) { //Calculates the cost of reaching to_node template void ConnectionRouter::evaluate_timing_driven_node_costs(t_heap* to, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, RRNodeId from_node, RRNodeId to_node, RREdgeId from_edge, @@ -921,7 +924,7 @@ void ConnectionRouter::add_route_tree_to_heap( const RouteTreeNode& rt_node, RRNodeId target_node, const t_bb& bounding_box, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, bool from_high_fanout) { if (from_high_fanout) { router_stats_->add_all_rt_from_high_fanout++; @@ -969,7 +972,7 @@ void ConnectionRouter::add_route_tree_node_to_heap( const RouteTreeNode& rt_node, RRNodeId target_node, const t_bb& bounding_box, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, bool is_high_fanout) { const auto& device_ctx = g_vpr_ctx.device(); const RRNodeId inode = rt_node.inode; @@ -1045,9 +1048,9 @@ template t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( const RouteTreeNode& rt_root, RRNodeId target_node, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, const SpatialRouteTreeLookup& spatial_rt_lookup, - t_bb net_bounding_box) { + const t_bb& net_bounding_box) { //For high fanout nets we only add those route tree nodes which are spatially close //to the sink. // diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h index 32c9e327ec4..f514941981a 100644 --- a/vpr/src/route/connection_router.h +++ b/vpr/src/route/connection_router.h @@ -72,8 +72,8 @@ class ConnectionRouter : public ConnectionRouterInterface { const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RouterStats& router_stats, const ConnectionParameters& conn_params, bool can_grow_bb) final; @@ -92,8 +92,8 @@ class ConnectionRouter : public ConnectionRouterInterface { const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb net_bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& net_bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, const ConnectionParameters& conn_params, @@ -110,8 +110,8 @@ class ConnectionRouter : public ConnectionRouterInterface { // RouterLookahead used should be the NoOpLookahead. vtr::vector timing_driven_find_all_shortest_paths_from_route_tree( const RouteTree& tree, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RouterStats& router_stats, const ConnectionParameters& conn_params) final; @@ -169,8 +169,8 @@ class ConnectionRouter : public ConnectionRouterInterface { const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, bool can_grow_bb); // Finds a path to sink_node, starting from the elements currently in the @@ -184,21 +184,21 @@ class ConnectionRouter : public ConnectionRouterInterface { // found t_heap* timing_driven_route_connection_from_heap( RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box); + const t_conn_cost_params& cost_params, + const t_bb& bounding_box); // Expand this current node if it is a cheaper path. void timing_driven_expand_cheapest( t_heap* cheapest, RRNodeId target_node, - const t_conn_cost_params cost_params, - t_bb bounding_box); + const t_conn_cost_params& cost_params, + const t_bb& bounding_box); // Expand each neighbor of the current node. void timing_driven_expand_neighbours( t_heap* current, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RRNodeId target_node); // Conditionally adds to_node to the router heap (via path from from_node @@ -211,15 +211,15 @@ class ConnectionRouter : public ConnectionRouterInterface { RRNodeId from_node, RREdgeId from_edge, RRNodeId to_node, - const t_conn_cost_params cost_params, - const t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RRNodeId target_node, - const t_bb target_bb); + const t_bb& target_bb); // Add to_node to the heap, and also add any nodes which are connected by // non-configurable edges void timing_driven_add_to_heap( - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, const t_heap* current, RRNodeId from_node, RRNodeId to_node, @@ -229,7 +229,7 @@ class ConnectionRouter : public ConnectionRouterInterface { // Calculates the cost of reaching to_node void evaluate_timing_driven_node_costs( t_heap* to, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, RRNodeId from_node, RRNodeId to_node, RREdgeId from_edge, @@ -237,8 +237,8 @@ class ConnectionRouter : public ConnectionRouterInterface { // Find paths from current heap to all nodes in the RR graph vtr::vector timing_driven_find_all_shortest_paths_from_heap( - const t_conn_cost_params cost_params, - t_bb bounding_box); + const t_conn_cost_params& cost_params, + const t_bb& bounding_box); void empty_heap_annotating_node_route_inf(); @@ -247,11 +247,11 @@ class ConnectionRouter : public ConnectionRouterInterface { void add_route_tree_to_heap(const RouteTreeNode& rt_node, RRNodeId target_node, const t_bb& bounding_box, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, bool from_high_fanout); // Evaluate node costs using the RCV algorith - float compute_node_cost_using_rcv(const t_conn_cost_params cost_params, + float compute_node_cost_using_rcv(const t_conn_cost_params& cost_params, RRNodeId to_node, RRNodeId target_node, float backwards_delay, @@ -266,15 +266,15 @@ class ConnectionRouter : public ConnectionRouterInterface { const RouteTreeNode& rt_node, RRNodeId target_node, const t_bb& bounding_box, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, bool is_high_fanout); t_bb add_high_fanout_route_tree_to_heap( const RouteTreeNode& rt_root, RRNodeId target_node, - const t_conn_cost_params cost_params, + const t_conn_cost_params& cost_params, const SpatialRouteTreeLookup& spatial_route_tree_lookup, - t_bb net_bounding_box); + const t_bb& net_bounding_box); const DeviceGrid& grid_; const RouterLookahead& router_lookahead_; diff --git a/vpr/src/route/connection_router_interface.h b/vpr/src/route/connection_router_interface.h index b58a037ec4f..803114a6639 100644 --- a/vpr/src/route/connection_router_interface.h +++ b/vpr/src/route/connection_router_interface.h @@ -56,8 +56,8 @@ class ConnectionRouterInterface { const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RouterStats& router_stats, const ConnectionParameters& conn_params, bool can_grow_bb) @@ -77,8 +77,8 @@ class ConnectionRouterInterface { const RouteTree& tree, RRNodeId source_node, RRNodeId sink_node, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, const ConnectionParameters& conn_params, @@ -96,8 +96,8 @@ class ConnectionRouterInterface { // RouterLookahead used should be the NoOpLookahead. virtual vtr::vector timing_driven_find_all_shortest_paths_from_route_tree( const RouteTree& tree, - const t_conn_cost_params cost_params, - t_bb bounding_box, + const t_conn_cost_params& cost_params, + const t_bb& bounding_box, RouterStats& router_stats, const ConnectionParameters& conn_params) = 0; diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index 23fa1473a81..76b224c0ec5 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -156,7 +156,7 @@ t_heap* prepare_to_add_node_to_heap( return hptr; } -/* Puts an rr_node on the heap if it is the cheapest path. */ +/** Puts an rr_node on the heap if it is the cheapest path. */ template void add_node_to_heap( T* heap, diff --git a/vpr/src/route/route_parallel.cpp b/vpr/src/route/route_parallel.cpp index 73aab09ac20..269d3b53ba2 100644 --- a/vpr/src/route/route_parallel.cpp +++ b/vpr/src/route/route_parallel.cpp @@ -79,7 +79,11 @@ class RouteIterCtx { }; /** Don't try to decompose nets if # of iterations > this. */ -constexpr int MAX_DECOMP_ITER = 6; +constexpr int MAX_DECOMP_ITER = 5; + +/** Don't try to decompose a regular net more than this many times. + * For instance, max_decomp_depth=2 means one regular net can become 4 virtual nets at max. */ +constexpr int MAX_DECOMP_DEPTH = 2; /** * Try to route in parallel with the given ConnectionRouter. @@ -234,13 +238,42 @@ bool try_parallel_route_tmpl(const Netlist<>& net_list, route_budgets budgeting_inf(net_list, is_flat); + const RouterLookahead* router_lookahead; + + { + vtr::ScopedStartFinishTimer timer("Obtaining lookahead"); // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized. - const RouterLookahead* router_lookahead = get_cached_router_lookahead(det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); + router_lookahead = get_cached_router_lookahead(det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + + if (is_flat) { + // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since + // they haven't been initialized when the map related to global resources was initialized. + auto cache_key = route_ctx.router_lookahead_cache_key_; + std::unique_ptr mut_router_lookahead(route_ctx.cached_router_lookahead_.release()); + VTR_ASSERT(mut_router_lookahead); + route_ctx.cached_router_lookahead_.clear(); + if (!router_opts.read_intra_cluster_router_lookahead.empty()) { + mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead); + } else { + mut_router_lookahead->compute_intra_tile(); + } + route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead)); + router_lookahead = get_cached_router_lookahead(det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + if (!router_opts.write_intra_cluster_router_lookahead.empty()) { + router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead); + } + } + } VTR_ASSERT(router_lookahead != nullptr); @@ -1099,6 +1132,9 @@ bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNode& node if (!node.left || !node.right) return false; + if(vnet.times_decomposed >= MAX_DECOMP_DEPTH) + return false; + /* Cutline doesn't go through vnet (a valid case: it wasn't there when partition tree was being built) */ if(node.cutline_axis == Axis::X){ if(vnet.clipped_bb.xmin > node.cutline_pos || vnet.clipped_bb.xmax < node.cutline_pos) @@ -1151,6 +1187,8 @@ std::tuple make_decomposed_pair(ParentNetId net_id, int source_half.clipped_bb = clip_to_side(bb, cutline_axis, cutline_pos, source_side); sink_half.net_id = net_id; sink_half.clipped_bb = clip_to_side(bb, cutline_axis, cutline_pos, !source_side); + source_half.times_decomposed = 1; + sink_half.times_decomposed = 1; if (source_side == Side::RIGHT) return std::make_tuple(sink_half, source_half); else @@ -1180,7 +1218,7 @@ bool is_close_to_cutline(RRNodeId inode, int cutline_pos, Axis cutline_axis, int const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - /* Cutlines are considered to be at x + 0.5, set a thickness of 1 here by checking for equality */ + /* Cutlines are considered to be at x + 0.5, set a thickness of +1 here by checking for equality */ if(cutline_axis == Axis::X){ return rr_graph.node_xlow(inode) - thickness <= cutline_pos && rr_graph.node_xhigh(inode) + thickness >= cutline_pos; } else { @@ -1188,6 +1226,23 @@ bool is_close_to_cutline(RRNodeId inode, int cutline_pos, Axis cutline_axis, int } } +/** Is \p inode too close to this bb? (Assuming it's inside) + * We assign some "thickness" to the node and check for collision */ +bool is_close_to_bb(RRNodeId inode, const t_bb& bb, int thickness){ + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + int xlow = rr_graph.node_xlow(inode) - thickness; + int ylow = rr_graph.node_ylow(inode) - thickness; + int xhigh = rr_graph.node_xhigh(inode) + thickness; + int yhigh = rr_graph.node_yhigh(inode) + thickness; + + return (xlow <= bb.xmin && xhigh >= bb.xmin) + || (ylow <= bb.ymin && yhigh >= bb.ymin) + || (xlow <= bb.xmax && xhigh >= bb.xmax) + || (ylow <= bb.ymax && yhigh >= bb.ymax); +} + /** Is this net divided very unevenly? If so, put all sinks in the small side into \p out and return true */ bool get_reduction_isinks(ParentNetId net_id, int cutline_pos, Axis cutline_axis, std::set& out){ const auto& route_ctx = g_vpr_ctx.routing(); @@ -1207,6 +1262,8 @@ bool get_reduction_isinks(ParentNetId net_id, int cutline_pos, Axis cutline_axis if(inside_bb(rr_sink, sink_side_bb)){ if(!is_isink_reached[isink]) sinks.push_back(isink); + if(is_close_to_cutline(rr_sink, cutline_pos, cutline_axis, 1)) /* Don't count sinks close to cutline */ + continue; all_sinks++; } } @@ -1219,7 +1276,7 @@ bool get_reduction_isinks(ParentNetId net_id, int cutline_pos, Axis cutline_axis } /* Is the sink side narrow? In that case, it may not contain enough wires to route */ - const int MIN_WIDTH = 8; + const int MIN_WIDTH = 10; int W = sink_side_bb.xmax - sink_side_bb.xmin + 1; int H = sink_side_bb.ymax - sink_side_bb.ymin + 1; if(W < MIN_WIDTH || H < MIN_WIDTH){ @@ -1298,6 +1355,8 @@ std::tuple make_decomposed_pair_from_vnet(const VirtualN left_half.clipped_bb = clip_to_side(vnet.clipped_bb, cutline_axis, cutline_pos, Side::LEFT); right_half.net_id = vnet.net_id; right_half.clipped_bb = clip_to_side(vnet.clipped_bb, cutline_axis, cutline_pos, Side::RIGHT); + left_half.times_decomposed = vnet.times_decomposed + 1; + right_half.times_decomposed = vnet.times_decomposed + 1; return std::make_tuple(left_half, right_half); } @@ -1320,7 +1379,7 @@ int get_reduction_isinks_vnet(const VirtualNet& vnet, int cutline_pos, Axis cutl std::vector sinks; int all_sinks = 0; - const int MIN_WIDTH = 8; + const int MIN_WIDTH = 10; int W = side_bb.xmax - side_bb.xmin + 1; int H = side_bb.ymax - side_bb.ymin + 1; bool is_narrow = (W < MIN_WIDTH || H < MIN_WIDTH); @@ -1334,7 +1393,9 @@ int get_reduction_isinks_vnet(const VirtualNet& vnet, int cutline_pos, Axis cutl continue; if(!is_isink_reached[isink]) sinks.push_back(isink); - if(is_narrow) /* If the box is narrow, no need to look for unique sink locs -- we are going to reduce it anyway */ + if(is_narrow) /* If the box is narrow, don't check for all_sinks -- we are going to reduce it anyway */ + continue; + if(is_close_to_bb(rr_sink, side_bb, 1)) continue; all_sinks++; if(all_sinks > MIN_SINKS){ @@ -1354,6 +1415,50 @@ int get_reduction_isinks_vnet(const VirtualNet& vnet, int cutline_pos, Axis cutl return reduced_sides; } +/** Reduce only one side if vnet has source */ +bool get_reduction_isinks_vnet_with_source(const VirtualNet& vnet, int cutline_pos, Axis cutline_axis, std::set& out){ + const auto& route_ctx = g_vpr_ctx.routing(); + + const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value(); + int num_sinks = tree.num_sinks(); + std::vector sinks; + int all_sinks = 0; + + Side source_side = which_side(tree.root().inode, cutline_pos, cutline_axis); + const t_bb& net_bb = vnet.clipped_bb; + t_bb sink_side_bb = clip_to_side(net_bb, cutline_axis, cutline_pos, !source_side); + auto& is_isink_reached = tree.get_is_isink_reached(); + /* Get sinks on the sink side */ + for(int isink=1; isink get_decomposition_isinks_vnet(const VirtualNet& vnet, int cutline_pos, Axis cutline_axis) { const auto& route_ctx = g_vpr_ctx.routing(); @@ -1364,9 +1469,17 @@ std::vector get_decomposition_isinks_vnet(const VirtualNet& vnet, int cutli /* Sometimes cutlines divide a net very unevenly. In that case, just route to all * sinks in the small side and unblock. Add convex hull since we are in a vnet which * may not have a source at all */ - int reduced_sides = get_reduction_isinks_vnet(vnet, cutline_pos, cutline_axis, sampled_set); - if(reduced_sides < 2) - convex_hull_downsample_vnet(vnet, sampled_set); + if(inside_bb(tree.root().inode, vnet.clipped_bb)){ /* We have source, no need to sample after reduction in most cases */ + bool is_reduced = get_reduction_isinks_vnet_with_source(vnet, cutline_pos, cutline_axis, sampled_set); + bool source_on_cutline = is_close_to_cutline(tree.root().inode, cutline_pos, cutline_axis, 1); + if(!is_reduced || source_on_cutline) + convex_hull_downsample_vnet(vnet, sampled_set); + }else{ + int reduced_sides = get_reduction_isinks_vnet(vnet, cutline_pos, cutline_axis, sampled_set); + if(reduced_sides < 2){ + convex_hull_downsample_vnet(vnet, sampled_set); + } + } std::vector isinks = get_vnet_isinks(vnet); auto& is_isink_reached = tree.get_is_isink_reached(); @@ -1376,9 +1489,12 @@ std::vector get_decomposition_isinks_vnet(const VirtualNet& vnet, int cutli for(int isink: isinks){ if(is_isink_reached[isink]) continue; - RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink]; - if(is_close_to_cutline(rr_sink, cutline_pos, cutline_axis, 1)) + if(is_close_to_cutline(rr_sink, cutline_pos, cutline_axis, 1)){ + sampled_set.insert(isink); + continue; + } + if(is_close_to_bb(rr_sink, vnet.clipped_bb, 1)) sampled_set.insert(isink); } @@ -1423,12 +1539,12 @@ vtr::optional> route_and_decompose(ParentNetI tree.root()); } - /* Get pin criticalities */ - std::vector pin_criticality(num_sinks + 1); - /* Get the isinks to actually route to */ std::vector isinks_to_route = get_decomposition_isinks(net_id, node.cutline_pos, node.cutline_axis, ctx); + /* Get pin criticalities */ + std::vector pin_criticality(num_sinks + 1); + for (int isink : isinks_to_route) { if (ctx.timing_info) { auto pin = ctx.net_list.net_pin(net_id, isink); @@ -1487,14 +1603,13 @@ vtr::optional> route_and_decompose(ParentNetI cost_params, ctx.router_opts, tree, - spatial_route_tree_lookup, + (high_fanout ? &spatial_route_tree_lookup : nullptr), ctx.router_stats.local(), ctx.budgeting_inf, ctx.routing_predictor, ctx.choking_spots[net_id], ctx.is_flat, - route_ctx.route_bb[net_id], - num_sinks); + route_ctx.route_bb[net_id]); if (!sink_flags.success) /* Couldn't route. It's too much work to backtrack from here, just fail. */ return vtr::nullopt; @@ -1553,19 +1668,6 @@ vtr::optional> route_and_decompose_vnet(const if(isinks_to_route.size() == 0) /* All the sinks we were going to route are already reached -- just break down the net */ return make_decomposed_pair_from_vnet(vnet, node.cutline_pos, node.cutline_axis); - bool high_fanout = is_high_fanout(num_sinks, ctx.router_opts.high_fanout_threshold); - - /* I think it's OK to build the full high fanout lookup for both sides of the net. - * The work required to get the right bounding box and nodes into the lookup may - * be more than to just build it twice. */ - SpatialRouteTreeLookup spatial_route_tree_lookup; - if (high_fanout) { - spatial_route_tree_lookup = build_route_tree_spatial_lookup(ctx.net_list, - route_ctx.route_bb, - vnet.net_id, - tree.root()); - } - /* Get pin criticalities */ std::vector pin_criticality(tree.num_sinks() + 1); @@ -1590,6 +1692,19 @@ vtr::optional> route_and_decompose_vnet(const return pin_criticality[a] > pin_criticality[b]; }); + bool high_fanout = is_high_fanout(tree.num_sinks(), ctx.router_opts.high_fanout_threshold); + + /* I think it's OK to build the full high fanout lookup for both sides of the net. + * The work required to get the right bounding box and nodes into the lookup may + * be more than to just build it twice. */ + SpatialRouteTreeLookup spatial_route_tree_lookup; + if (high_fanout) { + spatial_route_tree_lookup = build_route_tree_spatial_lookup(ctx.net_list, + route_ctx.route_bb, + vnet.net_id, + tree.root()); + } + /* Update base costs according to fanout and criticality rules * TODO: Not sure what this does and if it's safe to call in parallel */ update_rr_base_costs(num_sinks); @@ -1627,14 +1742,13 @@ vtr::optional> route_and_decompose_vnet(const cost_params, ctx.router_opts, tree, - spatial_route_tree_lookup, + (high_fanout ? &spatial_route_tree_lookup : nullptr), ctx.router_stats.local(), ctx.budgeting_inf, ctx.routing_predictor, ctx.choking_spots[vnet.net_id], ctx.is_flat, - vnet.clipped_bb, - num_sinks); + vnet.clipped_bb); if (!sink_flags.success) /* Couldn't route. It's too much work to backtrack from here, just fail. */ return vtr::nullopt; @@ -1771,7 +1885,7 @@ NetResultFlags route_virtual_net(const VirtualNet& vnet, RouteIterCtx sample_both_epicenters(ParentNetId net_id, Axis cutline_axis, int cutline_pos) { +inline void sample_both_epicenters(ParentNetId net_id, int cutline_pos, Axis cutline_axis, std::set& out) { const auto& route_ctx = g_vpr_ctx.routing(); const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -181,6 +181,7 @@ inline std::vector sample_both_epicenters(ParentNetId net_id, Axis cutline_ const RouteTree& tree = route_ctx.route_trees[net_id].value(); int num_sinks = tree.num_sinks(); + auto& is_isink_reached = tree.get_is_isink_reached(); const t_bb& net_bb = route_ctx.route_bb[net_id]; t_bb left_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::LEFT); t_bb right_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::RIGHT); @@ -193,7 +194,7 @@ inline std::vector sample_both_epicenters(ParentNetId net_id, Axis cutline_ int best_left_isink = 0; int best_right_isink = 0; - for(int isink=0; isink sample_both_epicenters(ParentNetId net_id, Axis cutline_ } } - std::vector out; - if(best_left_isink) - out.push_back(best_left_isink); - if(best_right_isink) - out.push_back(best_right_isink); - - return out; + if(best_left_isink && !is_isink_reached[best_left_isink]) + out.insert(best_left_isink); + if(best_right_isink && !is_isink_reached[best_right_isink]) + out.insert(best_right_isink); } /** Sample one sink closest to each bbox's epicenter. The rationale is that the * sinks around the cutline will be sampled by the sink thickness rule anyway. */ -inline std::vector sample_both_epicenters_vnet(const VirtualNet& vnet, Axis cutline_axis, int cutline_pos) { +inline void sample_both_epicenters_vnet(const VirtualNet& vnet, int cutline_pos, Axis cutline_axis, std::set& out) { const auto& route_ctx = g_vpr_ctx.routing(); const auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -231,6 +229,7 @@ inline std::vector sample_both_epicenters_vnet(const VirtualNet& vnet, Axis const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value(); int num_sinks = tree.num_sinks(); + auto& is_isink_reached = tree.get_is_isink_reached(); const t_bb& net_bb = vnet.clipped_bb; t_bb left_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::LEFT); t_bb right_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::RIGHT); @@ -262,13 +261,10 @@ inline std::vector sample_both_epicenters_vnet(const VirtualNet& vnet, Axis } } - std::vector out; - if(best_left_isink) - out.push_back(best_left_isink); - if(best_right_isink) - out.push_back(best_right_isink); - - return out; + if(best_left_isink && !is_isink_reached[best_left_isink]) + out.insert(best_left_isink); + if(best_right_isink && !is_isink_reached[best_right_isink]) + out.insert(best_right_isink); } /** Sample sinks on the convex hull of the set {source + sinks}. Skip sinks if already reached. */ @@ -404,3 +400,104 @@ inline std::vector sample_single_sink(ParentNetId net_id, const std::vector return {}; } + +inline bool is_close_to_cutline2(RRNodeId inode, int cutline_pos, Axis cutline_axis, int thickness){ + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + /* Cutlines are considered to be at x + 0.5, set a thickness of +1 here by checking for equality */ + if(cutline_axis == Axis::X){ + return rr_graph.node_xlow(inode) - thickness <= cutline_pos && rr_graph.node_xhigh(inode) + thickness >= cutline_pos; + } else { + return rr_graph.node_ylow(inode) - thickness <= cutline_pos && rr_graph.node_yhigh(inode) + thickness >= cutline_pos; + } +} + +/** Is \p inode too close to this bb? (Assuming it's inside) + * We assign some "thickness" to the node and check for collision */ +inline bool is_close_to_bb2(RRNodeId inode, const t_bb& bb, int thickness){ + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + int xlow = rr_graph.node_xlow(inode) - thickness; + int ylow = rr_graph.node_ylow(inode) - thickness; + int xhigh = rr_graph.node_xhigh(inode) + thickness; + int yhigh = rr_graph.node_yhigh(inode) + thickness; + + return (xlow <= bb.xmin && xhigh >= bb.xmin) + || (ylow <= bb.ymin && yhigh >= bb.ymin) + || (xlow <= bb.xmax && xhigh >= bb.xmax) + || (ylow <= bb.ymax && yhigh >= bb.ymax); +} + +/** Sample the most critical sinks on both sides. Omit reached sinks. */ +inline void sample_two_sinks(ParentNetId net_id, const std::vector& pin_criticality, int cutline_pos, Axis cutline_axis, std::set& out) { + const auto& route_ctx = g_vpr_ctx.routing(); + const RouteTree& tree = route_ctx.route_trees[net_id].value(); + auto& is_isink_reached = tree.get_is_isink_reached(); + + std::vector isinks(tree.num_sinks()); + std::iota(isinks.begin(), isinks.end(), 1); + std::sort(isinks.begin(), isinks.end(), [&](int i, int j){ + return pin_criticality[i] > pin_criticality[j]; + }); + + int left_isink = -1; + int right_isink = -1; + const t_bb& net_bb = route_ctx.route_bb[net_id]; + t_bb left_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::LEFT); + t_bb right_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::RIGHT); + + for(int isink: isinks){ + RRNodeId rr_sink = route_ctx.net_rr_terminals[net_id][isink]; + if(is_close_to_cutline2(rr_sink, cutline_pos, cutline_axis, 3)) + continue; + if(inside_bb(rr_sink, left_bb)){ + left_isink = isink; + }else if(inside_bb(rr_sink, right_bb)){ + right_isink = isink; + } + if(left_isink > -1 && right_isink > -1) + break; + } + + if(left_isink > -1 && !is_isink_reached[left_isink]) + out.insert(left_isink); + if(right_isink > -1 && !is_isink_reached[right_isink]) + out.insert(right_isink); +} + +/** Sample the most critical sinks on both sides. Omit reached sinks. */ +inline void sample_two_sinks_vnet(const VirtualNet& vnet, const std::vector& pin_criticality, int cutline_pos, Axis cutline_axis, std::set& out) { + const auto& route_ctx = g_vpr_ctx.routing(); + const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value(); + auto& is_isink_reached = tree.get_is_isink_reached(); + + std::vector isinks(tree.num_sinks()); + std::iota(isinks.begin(), isinks.end(), 1); + std::sort(isinks.begin(), isinks.end(), [&](int i, int j){ + return pin_criticality[i] > pin_criticality[j]; + }); + + int left_isink = -1; + int right_isink = -1; + const t_bb& net_bb = vnet.clipped_bb; + t_bb left_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::LEFT); + t_bb right_bb = clip_to_side2(net_bb, cutline_axis, cutline_pos, Side::RIGHT); + + for(int isink: isinks){ + RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink]; + if(inside_bb(rr_sink, left_bb) && !is_close_to_cutline2(rr_sink, cutline_pos, cutline_axis, 3) && !is_close_to_bb2(rr_sink, left_bb, 1)){ + left_isink = isink; + }else if(inside_bb(rr_sink, right_bb) && !is_close_to_cutline2(rr_sink, cutline_pos, cutline_axis, 3) && !is_close_to_bb2(rr_sink, right_bb, 1)){ + right_isink = isink; + } + if(left_isink > -1 && right_isink > -1) + break; + } + + if(left_isink > -1 && !is_isink_reached[left_isink]) + out.insert(left_isink); + if(right_isink > -1 && !is_isink_reached[right_isink]) + out.insert(right_isink); +} \ No newline at end of file diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 75b2cdcda47..cba3339f6f9 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -1023,14 +1023,13 @@ NetResultFlags timing_driven_route_net(ConnectionRouter& router, cost_params, router_opts, tree, - spatial_route_tree_lookup, + (high_fanout ? &spatial_route_tree_lookup : nullptr), router_stats, budgeting_inf, routing_predictor, choking_spots, is_flat, - route_ctx.route_bb[net_id], - num_sinks); + route_ctx.route_bb[net_id]); flags.retry_with_full_bb |= sink_flags.retry_with_full_bb; @@ -1171,14 +1170,13 @@ NetResultFlags timing_driven_route_sink(ConnectionRouter& router, const t_conn_cost_params cost_params, const t_router_opts& router_opts, RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, + SpatialRouteTreeLookup* spatial_rt_lookup, RouterStats& router_stats, route_budgets& budgeting_inf, const RoutingPredictor& routing_predictor, const std::vector>& choking_spots, bool is_flat, - const t_bb& bounding_box, - size_t num_sinks) { + const t_bb& bounding_box) { const auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -1199,7 +1197,7 @@ NetResultFlags timing_driven_route_sink(ConnectionRouter& router, bool can_grow_bb = (router_opts.router_algorithm != PARALLEL); bool net_is_global = net_list.net_is_global(net_id); - bool high_fanout = is_high_fanout(num_sinks, router_opts.high_fanout_threshold); + bool high_fanout = (spatial_rt_lookup != nullptr); constexpr float HIGH_FANOUT_CRITICALITY_THRESHOLD = 0.9; bool sink_critical = (cost_params.criticality > HIGH_FANOUT_CRITICALITY_THRESHOLD); bool net_is_clock = route_ctx.is_clock_net[net_id] != 0; @@ -1217,7 +1215,7 @@ NetResultFlags timing_driven_route_sink(ConnectionRouter& router, sink_node, cost_params, bounding_box, - spatial_rt_lookup, + *spatial_rt_lookup, router_stats, conn_params, can_grow_bb); @@ -1256,9 +1254,9 @@ NetResultFlags timing_driven_route_sink(ConnectionRouter& router, route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ vtr::optional new_branch, new_sink; - std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, target_pin, ((high_fanout) ? &spatial_rt_lookup : nullptr), is_flat); + std::tie(new_branch, new_sink) = tree.update_from_heap(&cheapest, target_pin, spatial_rt_lookup, is_flat); - VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), spatial_rt_lookup)); + VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(tree.root(), *spatial_rt_lookup)); if (f_router_debug) { std::string msg = vtr::string_fmt("Routed Net %zu connection %d to RR node %d successfully", size_t(net_id), itarget, sink_node); @@ -1358,19 +1356,19 @@ void setup_routing_resources(int itry, profiling::net_rebuild_end(num_sinks, tree->get_remaining_isinks().size()); // still need to calculate the tree's time delay - tree.value().reload_timing(); + tree->reload_timing(); // check for R_upstream C_downstream and edge correctness - VTR_ASSERT_SAFE(tree.value().is_valid()); + VTR_ASSERT_SAFE(tree->is_valid()); // congestion should've been pruned away - VTR_ASSERT_SAFE(tree.value().is_uncongested()); + VTR_ASSERT_SAFE(tree->is_uncongested()); // mark remaining ends mark_remaining_ends(net_id); // mark the lookup (rr_node_route_inf) for existing tree elements as NO_PREVIOUS so add_to_path stops when it reaches one of them - update_rr_route_inf_from_tree(tree.value().root()); + update_rr_route_inf_from_tree(tree->root()); } // completed constructing the partial route tree and updated all other data structures to match diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h index 2595bf28074..01d1228cf7a 100644 --- a/vpr/src/route/route_timing.h +++ b/vpr/src/route/route_timing.h @@ -240,14 +240,13 @@ NetResultFlags timing_driven_route_sink(ConnectionRouter& router, const t_conn_cost_params cost_params, const t_router_opts& router_opts, RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, + SpatialRouteTreeLookup* spatial_rt_lookup, RouterStats& router_stats, route_budgets& budgeting_inf, const RoutingPredictor& routing_predictor, const std::vector>& choking_spots, bool is_flat, - const t_bb& bounding_box, - size_t num_sinks); + const t_bb& bounding_box); /** Attempt to route a single net. * diff --git a/vpr/src/route/virtual_net.h b/vpr/src/route/virtual_net.h index 453986ff531..4c0cff5e4ba 100644 --- a/vpr/src/route/virtual_net.h +++ b/vpr/src/route/virtual_net.h @@ -15,4 +15,7 @@ class VirtualNet { /** Clipped bounding box. This is needed to enable decomposing a net multiple times. * Otherwise we would need a history of side types and cutlines to compute the bbox. */ t_bb clipped_bb; + /** Times decomposed -- don't decompose vnets too deeply or + * it disturbs net ordering when it's eventually disabled & creates a runtime bump. */ + int times_decomposed = 0; };