Skip to content

Commit

Permalink
add comments to NetlistRouter
Browse files Browse the repository at this point in the history
  • Loading branch information
duck2 committed Nov 10, 2023
1 parent 83d6fc0 commit 79a9c5c
Show file tree
Hide file tree
Showing 15 changed files with 227 additions and 242 deletions.
23 changes: 20 additions & 3 deletions vpr/src/route/ParallelNetlistRouter.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,23 @@
#pragma once

/** @file Parallel case for NetlistRouter. Builds a PartitionTree from the
* netlist according to net bounding boxes and goes over it with a tbb::task_group.
* [!] Is not expected to use more than 2 effective threads on avg. */
/** @file Parallel case for NetlistRouter. Builds a \ref PartitionTree from the
* netlist according to net bounding boxes. Tree nodes are then routed in parallel
* using tbb::task_group. Each task routes the nets inside a node serially and then adds
* its child nodes to the task queue. This approach is serially equivalent & deterministic,
* but it can reduce QoR in congested cases [0].
*
* Note that the parallel router does not support graphical router breakpoints.
*
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
#include "netlist_routers.h"

#include <tbb/task_group.h>

/** Parallel impl for NetlistRouter.
* Holds enough context members to glue together ConnectionRouter and net routing functions,
* such as \ref route_net. Keeps the members in thread-local storage where needed,
* i.e. ConnectionRouters and RouteIterResults-es.
* See \ref route_net. */
template<typename HeapType>
class ParallelNetlistRouter : public NetlistRouter {
public:
Expand Down Expand Up @@ -37,12 +48,17 @@ class ParallelNetlistRouter : public NetlistRouter {
, _is_flat(is_flat) {}
~ParallelNetlistRouter() {}

/** Run a single iteration of netlist routing for this->_net_list. This usually means calling
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);

private:
/** A single task to route nets inside a PartitionTree node and add tasks for its child nodes to task group \p g. */
void route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack);

ConnectionRouter<HeapType> _make_router(const RouterLookahead* router_lookahead, bool is_flat) {
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.mutable_routing();
Expand All @@ -57,6 +73,7 @@ class ParallelNetlistRouter : public NetlistRouter {
route_ctx.rr_node_route_inf,
is_flat);
}

/* Context fields */
tbb::enumerable_thread_specific<ConnectionRouter<HeapType>> _routers_th;
const Netlist<>& _net_list;
Expand Down
15 changes: 10 additions & 5 deletions vpr/src/route/ParallelNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@
#include "route_net.h"
#include "vtr_time.h"

/** A parallel netlist router. Builds a PartitionTree from the netlist depending on bounding boxes
* and goes over it with a tbb::task_group. Is not expected to use more than 2 effective threads on avg. */
template<typename HeapType>
inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry, float pres_fac, float worst_neg_slack) {
/* Reset results for each thread */
for (auto& results : _results_th) {
results = RouteIterResults();
}

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
PartitionTree tree(_net_list);

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, tree.root(), itry, pres_fac, worst_neg_slack);
g.wait();

/* Combine results from threads */
RouteIterResults out;
for (auto& results : _results_th) {
out.stats.combine(results.stats);
Expand All @@ -40,7 +43,7 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&

vtr::Timer t;
for (auto net_id : node.nets) {
auto flags = try_timing_driven_route_net(
auto flags = route_net(
_routers_th.local(),
_net_list,
net_id,
Expand All @@ -59,11 +62,13 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
_choking_spots[net_id],
_is_flat);

if (!flags.success && !flags.retry_with_full_bb) { /* Disconnected RRG */
if (!flags.success && !flags.retry_with_full_bb) {
/* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */
_results_th.local().is_routable = false;
return;
}
if (flags.retry_with_full_bb) { /* Grow the BB and leave it to the next iteration */
if (flags.retry_with_full_bb) {
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
route_ctx.route_bb[net_id] = full_device_bb();
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion vpr/src/route/SerialNetlistRouter.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

/** @file Serial case for NetlistRouter: just loop through nets */
/** @file Serial case for \ref NetlistRouter: just loop through nets */

#include "netlist_routers.h"

Expand Down
14 changes: 8 additions & 6 deletions vpr/src/route/SerialNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
});

for (size_t i = 0; i < sorted_nets.size(); i++) {
ParentNetId net_id = sorted_nets[i];
NetResultFlags flags = try_timing_driven_route_net(
for (size_t inet = 0; inet < sorted_nets.size(); inet++) {
ParentNetId net_id = sorted_nets[inet];
NetResultFlags flags = route_net(
_router,
_net_list,
net_id,
Expand All @@ -37,14 +37,16 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
_choking_spots[net_id],
_is_flat);

if (!flags.success && !flags.retry_with_full_bb) { /* Disconnected RRG */
if (!flags.success && !flags.retry_with_full_bb) {
/* Disconnected RRG and ConnectionRouter doesn't think growing the BB will work */
out.is_routable = false;
return out;
}

if (flags.retry_with_full_bb) { /* Grow the BB and retry this net */
if (flags.retry_with_full_bb) {
/* Grow the BB and retry this net right away. */
route_ctx.route_bb[net_id] = full_device_bb();
i--;
inet--;
continue;
}

Expand Down
13 changes: 12 additions & 1 deletion vpr/src/route/netlist_routers.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,23 @@ struct RouteIterResults {
RouterStats stats;
};

/** Route a given netlist. Takes a big context and passes it around to net & sink routing fns. */
/** Route a given netlist. Takes a big context and passes it around to net & sink routing fns.
* route_netlist only needs to call the functions in route_net.h/tpp: they handle the global
* bookkeeping. */
class NetlistRouter {
public:
virtual ~NetlistRouter() {}

/** Run a single iteration of netlist routing for this->_net_list. This usually means calling
* route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0;

/** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/
virtual void set_rcv_enabled(bool x) = 0;

/** Set this NetlistRouter's timing_info ptr. We sometimes change timing_info
* throughout iterations, but not frequently enough to make it a public member. */
virtual void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info) = 0;
};

Expand Down
18 changes: 5 additions & 13 deletions vpr/src/route/route.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,6 @@
#include "route_utils.h"
#include "vtr_time.h"

/** Attempts a routing via the AIR algorithm. \p width_fac
* specifies the relative width of the channels, while the members of
* \p router_opts determine the value of the costs assigned to routing
* resource node, etc. \p det_routing_arch describes the detailed routing
* architecture (connection and switch boxes) of the FPGA; it is used
* only if a DETAILED routing has been selected.
*
* \return Success status. */
bool route(const Netlist<>& net_list,
int width_fac,
const t_router_opts& router_opts,
Expand Down Expand Up @@ -163,7 +155,7 @@ bool route(const Netlist<>& net_list,
VTR_ASSERT(router_lookahead != nullptr);

/* Routing parameters */
float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */
float pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */
int bb_fac = router_opts.bb_factor;

/* When routing conflicts are detected the bounding boxes are scaled
Expand Down Expand Up @@ -365,7 +357,7 @@ bool route(const Netlist<>& net_list,
//Decrease pres_fac so that critical connections will take more direct routes
//Note that we use first_iter_pres_fac here (typically zero), and switch to
//use initial_pres_fac on the next iteration.
pres_fac = update_pres_fac(router_opts.first_iter_pres_fac);
pres_fac = update_draw_pres_fac(router_opts.first_iter_pres_fac);

//Reduce timing tolerances to re-route more delay-suboptimal signals
connections_inf.set_connection_criticality_tolerance(0.7);
Expand All @@ -382,7 +374,7 @@ bool route(const Netlist<>& net_list,
//after the first routing convergence. Since that is often zero,
//we want to set pres_fac to a reasonable (i.e. typically non-zero)
//value afterwards -- so it grows when multiplied by pres_fac_mult
pres_fac = update_pres_fac(router_opts.initial_pres_fac);
pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac);
}

//Have we converged the maximum number of times, did not make any changes, or does it seem
Expand Down Expand Up @@ -445,12 +437,12 @@ bool route(const Netlist<>& net_list,

//Update pres_fac
if (itry == 1) {
pres_fac = update_pres_fac(router_opts.initial_pres_fac);
pres_fac = update_draw_pres_fac(router_opts.initial_pres_fac);
} else {
pres_fac *= router_opts.pres_fac_mult;

/* Avoid overflow for high iteration counts, even if acc_cost is big */
pres_fac = update_pres_fac(std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5)));
pres_fac = update_draw_pres_fac(std::min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5)));

// Increase short path criticality if it's having a hard time resolving hold violations due to congestion
if (budgeting_inf.if_set()) {
Expand Down
7 changes: 5 additions & 2 deletions vpr/src/route/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
#include "vpr_types.h"
#include "netlist.h"

/** Attempts a routing via the AIR algorithm. \p width_fac
* specifies the relative width of the channels, while the members of
/** Attempts a routing via the AIR algorithm [0].
*
* \p width_fac specifies the relative width of the channels, while the members of
* \p router_opts determine the value of the costs assigned to routing
* resource node, etc. \p det_routing_arch describes the detailed routing
* architecture (connection and switch boxes) of the FPGA; it is used
* only if a DETAILED routing has been selected.
*
* [0]: K. E. Murray, S. Zhong, and V. Betz, "AIR: A fast but lazy timing-driven FPGA router", in ASPDAC 2020
*
* \return Success status. */
bool route(const Netlist<>& net_list,
int width_fac,
Expand Down
3 changes: 0 additions & 3 deletions vpr/src/route/route_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,6 @@ void get_serial_num(const Netlist<>& net_list) {
VTR_LOG("Serial number (magic cookie) for the routing is: %d\n", serial_num);
}

/** This routine checks to see if this is a resource-feasible routing.
* That is, are all rr_node capacity limitations respected? It assumes
* that the occupancy arrays are up to date when it is called. */
bool feasible_routing() {
auto& device_ctx = g_vpr_ctx.device();
const auto& rr_graph = device_ctx.rr_graph;
Expand Down
7 changes: 5 additions & 2 deletions vpr/src/route/route_common.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#pragma once

/** @file More router utils: some used by the connection router, some by other
* router files and some used globally */
/** @file Misc. router utils: some used by the connection router, some by other
* router files and some used globally. */

#include <vector>
#include "clustered_netlist.h"
#include "rr_node_fwd.h"
#include "router_stats.h"
#include "globals.h"

/** This routine checks to see if this is a resource-feasible routing.
* That is, are all rr_node capacity limitations respected? It assumes
* that the occupancy arrays are up to date when it is called. */
bool feasible_routing();

vtr::vector<ParentNetId, t_bb> load_route_bb(const Netlist<>& net_list,
Expand Down
40 changes: 0 additions & 40 deletions vpr/src/route/route_net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
#include "route_net.h"
#include "stats.h"

/** When RCV is enabled, it's necessary to be able to completely ripup high fanout nets if there is still negative hold slack
* Normally the router will prune the illegal branches of high fanout nets, this will bypass this */
bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) {
if (router_opts.routing_budgets_algorithm != YOYO) {
return false;
Expand All @@ -14,10 +12,6 @@ bool check_hold(const t_router_opts& router_opts, float worst_neg_slack) {
return false;
}

/** Build and return a partial route tree from the legal connections from last iteration.
* along the way do:
* update pathfinder costs to be accurate to the partial route tree
* mark the rr_node sinks as targets to be reached. */
void setup_routing_resources(int itry,
ParentNetId net_id,
const Netlist<>& net_list,
Expand Down Expand Up @@ -110,7 +104,6 @@ void setup_routing_resources(int itry,
// completed constructing the partial route tree and updated all other data structures to match
}

/** Change the base costs of rr_nodes according to # of fanouts */
void update_rr_base_costs(int fanout) {
auto& device_ctx = g_vpr_ctx.mutable_device();

Expand Down Expand Up @@ -143,7 +136,6 @@ void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) {
}
}

/** Detect if net should be routed or not */
bool should_route_net(ParentNetId net_id,
CBRR& connections_inf,
bool if_force_reroute) {
Expand Down Expand Up @@ -268,13 +260,6 @@ WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t avail
return WirelengthInfo(available_wirelength, used_wirelength);
}

/** Returns true if the specified net fanout is classified as high fanout */
bool is_high_fanout(int fanout, int fanout_threshold) {
if (fanout_threshold < 0 || fanout < fanout_threshold) return false;
return true;
}

/** Returns the bounding box of a net's used routing resources */
t_bb calc_current_bb(const RouteTree& tree) {
auto& device_ctx = g_vpr_ctx.device();
const auto& rr_graph = device_ctx.rr_graph;
Expand Down Expand Up @@ -303,29 +288,6 @@ t_bb calc_current_bb(const RouteTree& tree) {
return bb;
}

/** Give-up on reconvergent routing if the CPD improvement after the
* first iteration since convergence is small, compared to the best
* CPD seen so far */
bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts,
int itry_since_last_convergence,
std::shared_ptr<const SetupHoldTimingInfo> timing_info,
const RoutingMetrics& best_routing_metrics) {
if (itry_since_last_convergence == 1) {
float cpd_ratio = timing_info->setup_worst_negative_slack() / best_routing_metrics.sWNS;

// Give up if we see less than a 1% CPD improvement,
// after reducing pres_fac. Typically larger initial
// improvements are needed to see an actual improvement
// in final legal routing quality.
if (cpd_ratio >= router_opts.reconvergence_cpd_threshold) {
VTR_LOG("Giving up routing since additional routing convergences seem unlikely to improve quality (CPD ratio: %g)\n", cpd_ratio);
return true; // Potential CPD improvement is small, don't spend run-time trying to improve it
}
}

return false; // Don't give up
}

// Initializes net_delay based on best-case delay estimates from the router lookahead
void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
const Netlist<>& net_list,
Expand Down Expand Up @@ -358,8 +320,6 @@ void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead,
}
}

/** Goes through all the sinks of this net and copies their delay values from
* the route_tree to the net_delay array. */
void update_net_delays_from_route_tree(float* net_delay,
const Netlist<>& net_list,
ParentNetId inet,
Expand Down
Loading

0 comments on commit 79a9c5c

Please sign in to comment.