diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp index 33392a6e8d1..d67e7c7afae 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp @@ -200,7 +200,7 @@ NodeId TimingGraph::add_node(const NodeType type) { EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const NodeId sink_node) { //We require that the source/sink node must already be in the graph, - // so we can update them with thier edge references + // so we can update them with their edge references TATUM_ASSERT(valid_node_id(src_node)); TATUM_ASSERT(valid_node_id(sink_node)); @@ -211,7 +211,7 @@ EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const N EdgeId edge_id = EdgeId(edge_ids_.size()); edge_ids_.push_back(edge_id); - //Create the edgge + //Create the edge edge_types_.push_back(type); edge_src_nodes_.push_back(src_node); edge_sink_nodes_.push_back(sink_node); @@ -318,7 +318,7 @@ GraphIdMaps TimingGraph::compress() { levelize(); validate(); - return {node_id_map, edge_id_map}; + return {std::move(node_id_map), std::move(edge_id_map)}; } void TimingGraph::levelize() { @@ -474,21 +474,20 @@ GraphIdMaps TimingGraph::optimize_layout() { levelize(); - return {node_id_map, edge_id_map}; + return {std::move(node_id_map), std::move(edge_id_map)}; } tatum::util::linear_map TimingGraph::optimize_edge_layout() const { //Make all edges in a level be contiguous in memory //Determine the edges driven by each level of the graph - std::vector> edge_levels; + std::vector> edge_levels(levels().size()); for(LevelId level_id : levels()) { - edge_levels.push_back(std::vector()); - for(auto node_id : level_nodes(level_id)) { + for(NodeId node_id : level_nodes(level_id)) { //We walk the nodes according to the input-edge order. //This is the same order used by the arrival-time traversal (which is responsible - //for most of the analyzer run-time), so matching it's order exactly results in + //for most of the analyzer run-time), so matching its order exactly results in //better cache locality for(EdgeId edge_id : node_in_edges(node_id)) { @@ -498,7 +497,7 @@ tatum::util::linear_map TimingGraph::optimize_edge_layout() const } } - //Maps from from original to new edge id, used to update node to edge refs + //Maps from original to new edge id, used to update node to edge refs tatum::util::linear_map orig_to_new_edge_id(edges().size()); //Determine the new order @@ -874,7 +873,7 @@ std::vector> identify_combinational_loops(const TimingGraph& } std::vector find_transitively_connected_nodes(const TimingGraph& tg, - const std::vector through_nodes, + const std::vector& through_nodes, size_t max_depth) { std::vector nodes; @@ -890,7 +889,7 @@ std::vector find_transitively_connected_nodes(const TimingGraph& tg, } std::vector find_transitive_fanin_nodes(const TimingGraph& tg, - const std::vector sinks, + const std::vector& sinks, size_t max_depth) { std::vector nodes; @@ -905,7 +904,7 @@ std::vector find_transitive_fanin_nodes(const TimingGraph& tg, } std::vector find_transitive_fanout_nodes(const TimingGraph& tg, - const std::vector sources, + const std::vector& sources, size_t max_depth) { std::vector nodes; diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp index 72a05cad9da..f4cd54ad8d9 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp @@ -11,8 +11,8 @@ * store all edges as bi-directional edges. * * NOTE: We store only the static connectivity and node information in the 'TimingGraph' class. - * Other dynamic information (edge delays, node arrival/required times) is stored seperately. - * This means that most actions opearting on the timing graph (e.g. TimingAnalyzers) only + * Other dynamic information (edge delays, node arrival/required times) is stored separately. + * This means that most actions operating on the timing graph (e.g. TimingAnalyzers) only * require read-only access to the timing graph. * * Accessing Graph Data @@ -28,9 +28,9 @@ * rather than the more typical "Array of Structs (AoS)" data layout. * * By using a SoA layout we keep all data for a particular field (e.g. node types) in contiguous - * memory. Using an AoS layout the various fields accross nodes would *not* be contiguous + * memory. Using an AoS layout the various fields across nodes would *not* be contiguous * (although the different fields within each object (e.g. a TimingNode class) would be contiguous. - * Since we typically perform operations on particular fields accross nodes the SoA layout performs + * Since we typically perform operations on particular fields across nodes the SoA layout performs * better (and enables memory ordering optimizations). The edges are also stored in a SOA format. * * The SoA layout also motivates the ID based approach, which allows direct indexing into the required @@ -48,11 +48,12 @@ * and ensures that each cache line pulled into the cache will (likely) be accessed multiple times * before being evicted. * - * Note that performing these optimizations is currently done explicity by calling the optimize_edge_layout() - * and optimize_node_layout() member functions. In the future (particularily if incremental modification + * Note that performing these optimizations is currently done explicitly by calling the optimize_edge_layout() + * and optimize_node_layout() member functions. In the future (particularly if incremental modification * support is added), it may be a good idea apply these modifications automatically as needed. * */ +#include #include #include #include @@ -149,7 +150,7 @@ class TimingGraph { ///\pre The graph must be levelized. ///\returns A range containing the nodes which are primary inputs (i.e. SOURCE's with no fanin, corresponding to top level design inputs pins) - ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incomming edges from the clock network) + ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incoming edges from the clock network) ///\see levelize() node_range primary_inputs() const { TATUM_ASSERT_MSG(is_levelized_, "Timing graph must be levelized"); @@ -282,7 +283,7 @@ class TimingGraph { //Node data tatum::util::linear_map node_ids_; //The node IDs in the graph tatum::util::linear_map node_types_; //Type of node - tatum::util::linear_map> node_in_edges_; //Incomiing edge IDs for node + tatum::util::linear_map> node_in_edges_; //Incoming edge IDs for node tatum::util::linear_map> node_out_edges_; //Out going edge IDs for node tatum::util::linear_map node_levels_; //Out going edge IDs for node @@ -293,12 +294,12 @@ class TimingGraph { tatum::util::linear_map edge_src_nodes_; //Source node for each edge tatum::util::linear_map edges_disabled_; - //Auxilary graph-level info, filled in by levelize() + //Auxiliary graph-level info, filled in by levelize() tatum::util::linear_map level_ids_; //The level IDs in the graph tatum::util::linear_map> level_nodes_; //Nodes in each level std::vector primary_inputs_; //Primary input nodes of the timing graph. std::vector logical_outputs_; //Logical output nodes of the timing graph. - bool is_levelized_ = false; //Inidcates if the current levelization is valid + bool is_levelized_ = false; //Indicates if the current levelization is valid bool allow_dangling_combinational_nodes_ = false; @@ -310,26 +311,31 @@ std::vector> identify_combinational_loops(const TimingGraph& //Returns the set of nodes transitively connected (either fanin or fanout) to nodes in through_nodes //up to max_depth (default infinite) hops away std::vector find_transitively_connected_nodes(const TimingGraph& tg, - const std::vector through_nodes, + const std::vector& through_nodes, size_t max_depth=std::numeric_limits::max()); //Returns the set of nodes in the transitive fanin of nodes in sinks up to max_depth (default infinite) hops away std::vector find_transitive_fanin_nodes(const TimingGraph& tg, - const std::vector sinks, + const std::vector& sinks, size_t max_depth=std::numeric_limits::max()); //Returns the set of nodes in the transitive fanout of nodes in sources up to max_depth (default infinite) hops away std::vector find_transitive_fanout_nodes(const TimingGraph& tg, - const std::vector sources, + const std::vector& sources, size_t max_depth=std::numeric_limits::max()); EdgeType infer_edge_type(const TimingGraph& tg, EdgeId edge); //Mappings from old to new IDs struct GraphIdMaps { - GraphIdMaps(tatum::util::linear_map node_map, - tatum::util::linear_map edge_map) + GraphIdMaps(const tatum::util::linear_map& node_map, + const tatum::util::linear_map& edge_map) : node_id_map(node_map), edge_id_map(edge_map) {} + + GraphIdMaps(tatum::util::linear_map&& node_map, + tatum::util::linear_map&& edge_map) + : node_id_map(std::move(node_map)), edge_id_map(std::move(edge_map)) {} + tatum::util::linear_map node_id_map; tatum::util::linear_map edge_id_map; }; diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp index 9ac444bc61f..db34f59a049 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp @@ -18,7 +18,7 @@ namespace tatum { * This file defines the AnalyzerFactory class used to construct timing analyzers. * * We assume that the user has already defined the timing graph, constraints and - * thier own delay calculator: + * their own delay calculator: * * TimingGraph timing_graph; * TimingConstraints timing_constraints; @@ -33,7 +33,7 @@ namespace tatum { * timing_constraints, * delay_calculator); * - * We can similarily generate analyzers for other types of analysis, for instance Hold: + * We can similarly generate analyzers for other types of analysis, for instance Hold: * * auto hold_analyzer = AnalyzerFactory::make(timing_graph, * timing_constraints, @@ -45,7 +45,7 @@ namespace tatum { * timing_constraints, * delay_calculator); * - * The AnalzyerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class: + * The AnalyzerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class: * * SetupAnalysis => SetupTimingAnalyzer * HoldAnalysis => HoldTimingAnalyzer diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp index bfa1f0fa037..9d0a86ec217 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp @@ -8,7 +8,7 @@ namespace tatum { /** - * An exmaple DelayCalculator implementation which takes + * An example DelayCalculator implementation which takes * a vector of fixed pre-calculated edge delays * * \see DelayCalculator diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp index 2672560d155..be9680d20b5 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp @@ -21,7 +21,7 @@ class GraphVisitor { virtual void do_reset_node_arrival_tags_from_origin(const NodeId node_id, const NodeId origin) = 0; virtual void do_reset_node_required_tags_from_origin(const NodeId node_id, const NodeId origin) = 0; - //Returns true if the specified source/sink is unconstrainted + //Returns true if the specified source/sink is unconstrained virtual bool do_arrival_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0; virtual bool do_required_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0; diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h index a82dd043da5..8e1332559ed 100644 --- a/libs/libarchfpga/src/device_grid.h +++ b/libs/libarchfpga/src/device_grid.h @@ -38,6 +38,10 @@ class DeviceGrid { size_t width() const { return grid_.dim_size(1); } ///@brief Return the height of the grid at the specified layer size_t height() const { return grid_.dim_size(2); } + ///@brief Return the grid dimensions in (# of layers, width, height) format + std::tuple dim_sizes() const { + return {grid_.dim_size(0), grid_.dim_size(1), grid_.dim_size(2)}; + } ///@brief Return the size of the flattened grid on the given layer inline size_t grid_size() const { diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index e780005ada1..6900fa80bd2 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -92,7 +92,6 @@ void SetupVPR(const t_options* options, t_packer_opts* packerOpts, t_placer_opts* placerOpts, t_ap_opts* apOpts, - t_annealing_sched* annealSched, t_router_opts* routerOpts, t_analysis_opts* analysisOpts, t_noc_opts* nocOpts, @@ -140,7 +139,7 @@ void SetupVPR(const t_options* options, SetupNetlistOpts(*options, *netlistOpts); SetupPlacerOpts(*options, placerOpts); - SetupAnnealSched(*options, annealSched); + SetupAnnealSched(*options, &placerOpts->anneal_sched); SetupRouterOpts(*options, routerOpts); SetupAnalysisOpts(*options, *analysisOpts); SetupPowerOpts(*options, powerOpts, arch); @@ -395,7 +394,7 @@ static void SetupSwitches(const t_arch& Arch, device_ctx.delayless_switch_idx = RoutingArch->delayless_switch; //Warn about non-zero Cout values for the ipin switch, since these values have no effect. - //VPR do not model the R/C's of block internal routing connectsion. + //VPR do not model the R/C's of block internal routing connection. // //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO) if (device_ctx.arch_switch_inf[RoutingArch->wire_to_arch_ipin_switch].Cout != 0.) { @@ -531,31 +530,6 @@ static void SetupAnnealSched(const t_options& Options, VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n"); } - AnnealSched->alpha_min = Options.PlaceAlphaMin; - if (AnnealSched->alpha_min >= 1 || AnnealSched->alpha_min <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_min must be between 0 and 1 exclusive.\n"); - } - - AnnealSched->alpha_max = Options.PlaceAlphaMax; - if (AnnealSched->alpha_max >= 1 || AnnealSched->alpha_max <= AnnealSched->alpha_min) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_max must be between alpha_min and 1 exclusive.\n"); - } - - AnnealSched->alpha_decay = Options.PlaceAlphaDecay; - if (AnnealSched->alpha_decay >= 1 || AnnealSched->alpha_decay <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_decay must be between 0 and 1 exclusive.\n"); - } - - AnnealSched->success_min = Options.PlaceSuccessMin; - if (AnnealSched->success_min >= 1 || AnnealSched->success_min <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_min must be between 0 and 1 exclusive.\n"); - } - - AnnealSched->success_target = Options.PlaceSuccessTarget; - if (AnnealSched->success_target >= 1 || AnnealSched->success_target <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_target must be between 0 and 1 exclusive.\n"); - } - AnnealSched->type = Options.anneal_sched_type; } @@ -600,7 +574,6 @@ void SetupPackerOpts(const t_options& Options, //TODO: document? PackerOpts->inter_cluster_net_delay = 1.0; /* DEFAULT */ PackerOpts->auto_compute_inter_cluster_net_delay = true; - PackerOpts->packer_algorithm = PACK_GREEDY; /* DEFAULT */ PackerOpts->device_layout = Options.device_layout; @@ -782,7 +755,7 @@ static void SetupServerOpts(const t_options& Options, t_server_opts* ServerOpts) } static void find_ipin_cblock_switch_index(const t_arch& Arch, int& wire_to_arch_ipin_switch, int& wire_to_arch_ipin_switch_between_dice) { - for (auto cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) { + for (int cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) { int ipin_cblock_switch_index = UNDEFINED; for (int iswitch = 0; iswitch < (int)Arch.switches.size(); ++iswitch) { if (Arch.switches[iswitch].name == Arch.ipin_cblock_switch_name[cb_switch_name_index]) { diff --git a/vpr/src/base/SetupVPR.h b/vpr/src/base/SetupVPR.h index 451fdc6567a..45bf510c18c 100644 --- a/vpr/src/base/SetupVPR.h +++ b/vpr/src/base/SetupVPR.h @@ -17,7 +17,6 @@ void SetupVPR(const t_options* Options, t_packer_opts* PackerOpts, t_placer_opts* PlacerOpts, t_ap_opts* APOpts, - t_annealing_sched* AnnealSched, t_router_opts* RouterOpts, t_analysis_opts* AnalysisOpts, t_noc_opts* NocOpts, diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index c9d3bb23e1e..ab261e6b7b1 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -3,7 +3,6 @@ #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_memory.h" #include "vpr_types.h" #include "vpr_error.h" @@ -17,8 +16,7 @@ /******** Function Prototypes ********/ static void ShowPackerOpts(const t_packer_opts& PackerOpts); static void ShowNetlistOpts(const t_netlist_opts& NetlistOpts); -static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, - const t_annealing_sched& AnnealSched); +static void ShowPlacerOpts(const t_placer_opts& PlacerOpts); static void ShowAnalyticalPlacerOpts(const t_ap_opts& APOpts); static void ShowRouterOpts(const t_router_opts& RouterOpts); static void ShowAnalysisOpts(const t_analysis_opts& AnalysisOpts); @@ -56,7 +54,7 @@ void ShowSetup(const t_vpr_setup& vpr_setup) { ShowPackerOpts(vpr_setup.PackerOpts); } if (vpr_setup.PlacerOpts.doPlacement) { - ShowPlacerOpts(vpr_setup.PlacerOpts, vpr_setup.AnnealSched); + ShowPlacerOpts(vpr_setup.PlacerOpts); } if (vpr_setup.APOpts.doAP) { ShowAnalyticalPlacerOpts(vpr_setup.APOpts); @@ -127,7 +125,6 @@ ClusteredNetlistStats::ClusteredNetlistStats() { auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - int j; L_num_p_inputs = 0; L_num_p_outputs = 0; num_blocks_type = std::vector(device_ctx.logical_block_types.size(), 0); @@ -136,12 +133,12 @@ ClusteredNetlistStats::ClusteredNetlistStats() { logical_block_types = device_ctx.logical_block_types; /* Count I/O input and output pads */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); auto physical_tile = pick_physical_type(logical_block); num_blocks_type[logical_block->index]++; if (is_io_type(physical_tile)) { - for (j = 0; j < logical_block->pb_type->num_pins; j++) { + for (int j = 0; j < logical_block->pb_type->num_pins; j++) { int physical_pin = get_physical_pin(physical_tile, logical_block, j); if (cluster_ctx.clb_nlist.block_net(blk_id, j) != ClusterNetId::INVALID()) { @@ -179,7 +176,7 @@ void ClusteredNetlistStats::write(OutputFormat fmt, std::ostream& output) const void writeClusteredNetlistStats(const std::string& block_usage_filename) { const auto stats = ClusteredNetlistStats(); - // Print out the human readable version to stdout + // Print out the human-readable version to stdout stats.write(ClusteredNetlistStats::OutputFormat::HumanReadable, std::cout); @@ -207,31 +204,22 @@ void writeClusteredNetlistStats(const std::string& block_usage_filename) { static void ShowAnnealSched(const t_annealing_sched& AnnealSched) { VTR_LOG("AnnealSched.type: "); switch (AnnealSched.type) { - case AUTO_SCHED: + case e_sched_type::AUTO_SCHED: VTR_LOG("AUTO_SCHED\n"); break; - case USER_SCHED: + case e_sched_type::USER_SCHED: VTR_LOG("USER_SCHED\n"); break; - case DUSTY_SCHED: - VTR_LOG("DUSTY_SCHED\n"); - break; default: VTR_LOG_ERROR("Unknown annealing schedule\n"); } VTR_LOG("AnnealSched.inner_num: %f\n", AnnealSched.inner_num); - if (USER_SCHED == AnnealSched.type) { + if (e_sched_type::USER_SCHED == AnnealSched.type) { VTR_LOG("AnnealSched.init_t: %f\n", AnnealSched.init_t); VTR_LOG("AnnealSched.alpha_t: %f\n", AnnealSched.alpha_t); VTR_LOG("AnnealSched.exit_t: %f\n", AnnealSched.exit_t); - } else if (DUSTY_SCHED == AnnealSched.type) { - VTR_LOG("AnnealSched.alpha_min: %f\n", AnnealSched.alpha_min); - VTR_LOG("AnnealSched.alpha_max: %f\n", AnnealSched.alpha_max); - VTR_LOG("AnnealSched.alpha_decay: %f\n", AnnealSched.alpha_decay); - VTR_LOG("AnnealSched.success_min: %f\n", AnnealSched.success_min); - VTR_LOG("AnnealSched.success_target: %f\n", AnnealSched.success_target); } } @@ -495,8 +483,7 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { VTR_LOG("\n"); } -static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, - const t_annealing_sched& AnnealSched) { +static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { VTR_LOG("PlacerOpts.place_freq: "); switch (PlacerOpts.place_freq) { case PLACE_ONCE: @@ -515,13 +502,13 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, || (PLACE_ALWAYS == PlacerOpts.place_freq)) { VTR_LOG("PlacerOpts.place_algorithm: "); switch (PlacerOpts.place_algorithm.get()) { - case BOUNDING_BOX_PLACE: + case e_place_algorithm::BOUNDING_BOX_PLACE: VTR_LOG("BOUNDING_BOX_PLACE\n"); break; - case CRITICALITY_TIMING_PLACE: + case e_place_algorithm::CRITICALITY_TIMING_PLACE: VTR_LOG("CRITICALITY_TIMING_PLACE\n"); break; - case SLACK_TIMING_PLACE: + case e_place_algorithm::SLACK_TIMING_PLACE: VTR_LOG("SLACK_TIMING_PLACE\n"); break; default: @@ -604,7 +591,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, VTR_LOG("PlaceOpts.seed: %d\n", PlacerOpts.seed); - ShowAnnealSched(AnnealSched); + ShowAnnealSched(PlacerOpts.anneal_sched); } VTR_LOG("\n"); } diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp index f771b8af154..eb597ff8abd 100644 --- a/vpr/src/base/atom_lookup.cpp +++ b/vpr/src/base/atom_lookup.cpp @@ -173,7 +173,7 @@ AtomLookup::pin_tnode_range AtomLookup::atom_pin_tnodes(BlockTnode block_tnode_t } void AtomLookup::set_atom_pin_tnode(const AtomPinId pin, const tatum::NodeId node, BlockTnode block_tnode_type) { - //A pin always expands to an external tnode (i.e. it's external connectivity in the netlist) + //A pin always expands to an external tnode (i.e. its external connectivity in the netlist) //but some pins may expand to an additional tnode (i.e. to SOURCE/SINK to cover internal sequential paths within a block) if (block_tnode_type == BlockTnode::EXTERNAL) { atom_pin_tnode_external_[pin] = node; diff --git a/vpr/src/base/atom_lookup_fwd.h b/vpr/src/base/atom_lookup_fwd.h index 02ab349cb94..1adb2e68bb0 100644 --- a/vpr/src/base/atom_lookup_fwd.h +++ b/vpr/src/base/atom_lookup_fwd.h @@ -5,7 +5,7 @@ class AtomLookup; enum class BlockTnode { INTERNAL, /// find_netlist_physical_clock_nets(const AtomNetlist& netlist) //clock generators // //Since we don't have good information about what pins are clock generators we build a lookup as we go - for (auto blk_id : netlist.blocks()) { + for (AtomBlockId blk_id : netlist.blocks()) { if (!blk_id) continue; + // Ignore I/O blocks AtomBlockType type = netlist.block_type(blk_id); if (type != AtomBlockType::BLOCK) continue; @@ -1352,7 +1353,7 @@ std::set find_netlist_physical_clock_nets(const AtomNetlist& netlist) const t_model* model = netlist.block_model(blk_id); VTR_ASSERT(model); if (clock_gen_ports.find(model) == clock_gen_ports.end()) { - //First time we've seen this model, intialize it + //First time we've seen this model, initialize it clock_gen_ports[model] = {}; //Look at all the ports to find clock generators @@ -1366,7 +1367,7 @@ std::set find_netlist_physical_clock_nets(const AtomNetlist& netlist) } //Look for connected input clocks - for (auto pin_id : netlist.block_clock_pins(blk_id)) { + for (AtomPinId pin_id : netlist.block_clock_pins(blk_id)) { if (!pin_id) continue; AtomNetId clk_net_id = netlist.pin_net(pin_id); @@ -1402,7 +1403,7 @@ std::set find_netlist_physical_clock_nets(const AtomNetlist& netlist) ///@brief Finds all logical clock drivers in the netlist (by back-tracing through logic) std::set find_netlist_logical_clock_drivers(const AtomNetlist& netlist) { - auto clock_nets = find_netlist_physical_clock_nets(netlist); + std::set clock_nets = find_netlist_physical_clock_nets(netlist); //We now have a set of nets which drive clock pins // @@ -1415,7 +1416,7 @@ std::set find_netlist_logical_clock_drivers(const AtomNetlist& netlis prev_clock_nets = clock_nets; clock_nets.clear(); - for (auto clk_net : prev_clock_nets) { + for (AtomNetId clk_net : prev_clock_nets) { AtomPinId driver_pin = netlist.net_driver(clk_net); AtomPortId driver_port = netlist.pin_port(driver_pin); AtomBlockId driver_blk = netlist.port_block(driver_port); @@ -1467,7 +1468,7 @@ std::set find_netlist_logical_clock_drivers(const AtomNetlist& netlis //Extract the net drivers std::set clock_drivers; - for (auto net : clock_nets) { + for (AtomNetId net : clock_nets) { AtomPinId driver = netlist.net_driver(net); if (netlist.pin_is_constant(driver)) { diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index df77dae97ca..ba7e20ccd80 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -55,7 +55,6 @@ static float comp_width(t_chan* chan, float x, float separation); int binary_search_place_and_route(const Netlist<>& placement_net_list, const Netlist<>& router_net_list, const t_placer_opts& placer_opts_ref, - const t_annealing_sched& annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, @@ -183,7 +182,6 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, - annealing_sched, router_opts, analysis_opts, noc_opts, @@ -191,7 +189,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, det_routing_arch, segment_inf, arch->directs, - false); + /*is_flat=*/false); } success = route(router_net_list, current, @@ -326,10 +324,10 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, break; if (placer_opts.place_freq == PLACE_ALWAYS) { placer_opts.place_chan_width = current; - try_place(placement_net_list, placer_opts, annealing_sched, router_opts, analysis_opts, noc_opts, + try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts, arch->Chans, det_routing_arch, segment_inf, arch->directs, - false); + /*is_flat=*/false); } success = route(router_net_list, diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h index b4735ed8af4..6f191c0ff9e 100644 --- a/vpr/src/base/place_and_route.h +++ b/vpr/src/base/place_and_route.h @@ -25,7 +25,6 @@ struct t_fmap_cell { int binary_search_place_and_route(const Netlist<>& placement_net_list, const Netlist<>& router_net_list, const t_placer_opts& placer_opts_ref, - const t_annealing_sched& annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 7ddaa08feff..1641e255b89 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -393,11 +393,11 @@ struct ParsePlaceAlgorithm { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "bounding_box") { - conv_value.set_value(BOUNDING_BOX_PLACE); + conv_value.set_value(e_place_algorithm::BOUNDING_BOX_PLACE); } else if (str == "criticality_timing") { - conv_value.set_value(CRITICALITY_TIMING_PLACE); + conv_value.set_value(e_place_algorithm::CRITICALITY_TIMING_PLACE); } else if (str == "slack_timing") { - conv_value.set_value(SLACK_TIMING_PLACE); + conv_value.set_value(e_place_algorithm::SLACK_TIMING_PLACE); } else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -415,12 +415,12 @@ struct ParsePlaceAlgorithm { ConvertedValue to_str(e_place_algorithm val) { ConvertedValue conv_value; - if (val == BOUNDING_BOX_PLACE) { + if (val == e_place_algorithm::BOUNDING_BOX_PLACE) { conv_value.set_value("bounding_box"); - } else if (val == CRITICALITY_TIMING_PLACE) { + } else if (val == e_place_algorithm::CRITICALITY_TIMING_PLACE) { conv_value.set_value("criticality_timing"); } else { - VTR_ASSERT(val == SLACK_TIMING_PLACE); + VTR_ASSERT(val == e_place_algorithm::SLACK_TIMING_PLACE); conv_value.set_value("slack_timing"); } return conv_value; @@ -435,11 +435,11 @@ struct ParsePlaceBoundingBox { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "auto_bb") { - conv_value.set_value(AUTO_BB); + conv_value.set_value(e_place_bounding_box_mode::AUTO_BB); } else if (str == "cube_bb") { - conv_value.set_value(CUBE_BB); + conv_value.set_value(e_place_bounding_box_mode::CUBE_BB); } else if (str == "per_layer_bb") { - conv_value.set_value(PER_LAYER_BB); + conv_value.set_value(e_place_bounding_box_mode::PER_LAYER_BB); } else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -450,12 +450,12 @@ struct ParsePlaceBoundingBox { ConvertedValue to_str(e_place_bounding_box_mode val) { ConvertedValue conv_value; - if (val == AUTO_BB) { + if (val == e_place_bounding_box_mode::AUTO_BB) { conv_value.set_value("auto_bb"); - } else if (val == CUBE_BB) { + } else if (val == e_place_bounding_box_mode::CUBE_BB) { conv_value.set_value("cube_bb"); } else { - VTR_ASSERT(val == PER_LAYER_BB); + VTR_ASSERT(val == e_place_bounding_box_mode::PER_LAYER_BB); conv_value.set_value("per_layer_bb"); } return conv_value; @@ -470,9 +470,9 @@ struct ParsePlaceAgentAlgorithm { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "e_greedy") - conv_value.set_value(E_GREEDY); + conv_value.set_value(e_agent_algorithm::E_GREEDY); else if (str == "softmax") - conv_value.set_value(SOFTMAX); + conv_value.set_value(e_agent_algorithm::SOFTMAX); else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_agent_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -483,10 +483,10 @@ struct ParsePlaceAgentAlgorithm { ConvertedValue to_str(e_agent_algorithm val) { ConvertedValue conv_value; - if (val == E_GREEDY) + if (val == e_agent_algorithm::E_GREEDY) conv_value.set_value("e_greedy"); else { - VTR_ASSERT(val == SOFTMAX); + VTR_ASSERT(val == e_agent_algorithm::SOFTMAX); conv_value.set_value("softmax"); } return conv_value; @@ -1957,36 +1957,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("0.8") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlphaMin, "--alpha_min") - .help( - "For placement using Dusty's annealing schedule. Minimum (starting) value of alpha.") - .default_value("0.2") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceAlphaMax, "--alpha_max") - .help( - "For placement using Dusty's annealing schedule. Maximum (stopping) value of alpha.") - .default_value("0.9") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceAlphaDecay, "--alpha_decay") - .help( - "For placement using Dusty's annealing schedule. The value that alpha is scaled by after reset.") - .default_value("0.7") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceSuccessMin, "--anneal_success_min") - .help( - "For placement using Dusty's annealing schedule. Minimum success ratio when annealing before resetting the temperature to maintain the target success ratio.") - .default_value("0.1") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceSuccessTarget, "--anneal_success_target") - .help( - "For placement using Dusty's annealing schedule. Target success ratio when annealing.") - .default_value("0.25") - .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.pad_loc_type, "--fix_pins") .help( "Fixes I/O pad locations randomly during placement. Valid options:\n" @@ -2996,7 +2966,7 @@ void set_conditional_defaults(t_options& args) { * Filenames */ - //We may have recieved the full circuit filepath in the circuit name, + //We may have received the full circuit filepath in the circuit name, //remove the extension and any leading path elements VTR_ASSERT(args.CircuitName.provenance() == Provenance::SPECIFIED); auto name_ext = vtr::split_ext(args.CircuitName); @@ -3085,9 +3055,9 @@ void set_conditional_defaults(t_options& args) { //Which placement algorithm to use? if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) { if (args.timing_analysis) { - args.PlaceAlgorithm.set(CRITICALITY_TIMING_PLACE, Provenance::INFERRED); + args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); } else { - args.PlaceAlgorithm.set(BOUNDING_BOX_PLACE, Provenance::INFERRED); + args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); } } @@ -3101,7 +3071,7 @@ void set_conditional_defaults(t_options& args) { // Check for correct options combinations // If you are running WLdriven placement, the RL reward function should be // either basic or nonPenalizing basic - if (args.RL_agent_placement && (args.PlaceAlgorithm == BOUNDING_BOX_PLACE || !args.timing_analysis)) { + if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") { VTR_LOG_WARN( "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n" @@ -3132,18 +3102,12 @@ void set_conditional_defaults(t_options& args) { } //Which schedule? - if (args.PlaceAlphaMin.provenance() == Provenance::SPECIFIED // Any of these flags select Dusty's schedule - || args.PlaceAlphaMax.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaDecay.provenance() == Provenance::SPECIFIED - || args.PlaceSuccessMin.provenance() == Provenance::SPECIFIED - || args.PlaceSuccessTarget.provenance() == Provenance::SPECIFIED) { - args.anneal_sched_type.set(DUSTY_SCHED, Provenance::INFERRED); - } else if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule - || args.PlaceExitT.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { - args.anneal_sched_type.set(USER_SCHED, Provenance::INFERRED); + if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule + || args.PlaceExitT.provenance() == Provenance::SPECIFIED + || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { + args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED); } else { - args.anneal_sched_type.set(AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule + args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule } /* diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 12f8e69104a..b43e3734de1 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -119,12 +119,7 @@ struct t_options { argparse::ArgValue PlaceInitT; argparse::ArgValue PlaceExitT; argparse::ArgValue PlaceAlphaT; - argparse::ArgValue PlaceAlphaMin; - argparse::ArgValue PlaceAlphaMax; - argparse::ArgValue PlaceAlphaDecay; - argparse::ArgValue PlaceSuccessMin; - argparse::ArgValue PlaceSuccessTarget; - argparse::ArgValue anneal_sched_type; + argparse::ArgValue anneal_sched_type; argparse::ArgValue PlaceAlgorithm; argparse::ArgValue PlaceQuenchAlgorithm; argparse::ArgValue pad_loc_type; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 1de10bf25a2..b763c368da5 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -289,7 +289,6 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a &vpr_setup->PackerOpts, &vpr_setup->PlacerOpts, &vpr_setup->APOpts, - &vpr_setup->AnnealSched, &vpr_setup->RouterOpts, &vpr_setup->AnalysisOpts, &vpr_setup->NocOpts, @@ -838,7 +837,6 @@ void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch& try_place(net_list, vpr_setup.PlacerOpts, - vpr_setup.AnnealSched, vpr_setup.RouterOpts, vpr_setup.AnalysisOpts, vpr_setup.NocOpts, @@ -1078,7 +1076,6 @@ RouteStatus vpr_route_min_W(const Netlist<>& net_list, int min_W = binary_search_place_and_route((const Netlist<>&)g_vpr_ctx.clustering().clb_nlist, net_list, vpr_setup.PlacerOpts, - vpr_setup.AnnealSched, router_opts, vpr_setup.AnalysisOpts, vpr_setup.NocOpts, @@ -1310,8 +1307,9 @@ static void free_complex_block_types() { void free_circuit() { //Free new net structures auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { cluster_ctx.clb_nlist.remove_block(blk_id); + } cluster_ctx.clb_nlist = ClusteredNetlist(); } @@ -1389,7 +1387,6 @@ void vpr_setup_vpr(t_options* Options, t_packer_opts* PackerOpts, t_placer_opts* PlacerOpts, t_ap_opts* APOpts, - t_annealing_sched* AnnealSched, t_router_opts* RouterOpts, t_analysis_opts* AnalysisOpts, t_noc_opts* NocOpts, @@ -1415,7 +1412,6 @@ void vpr_setup_vpr(t_options* Options, PackerOpts, PlacerOpts, APOpts, - AnnealSched, RouterOpts, AnalysisOpts, NocOpts, diff --git a/vpr/src/base/vpr_api.h b/vpr/src/base/vpr_api.h index dca8f7441ad..93cf2d12cc1 100644 --- a/vpr/src/base/vpr_api.h +++ b/vpr/src/base/vpr_api.h @@ -179,7 +179,6 @@ void vpr_setup_vpr(t_options* Options, t_packer_opts* PackerOpts, t_placer_opts* PlacerOpts, t_ap_opts* APOpts, - t_annealing_sched* AnnealSched, t_router_opts* RouterOpts, t_analysis_opts* AnalysisOpts, t_noc_opts* NocOpts, diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index d5698e3eadf..df280c52c53 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -86,6 +86,12 @@ enum class ScreenUpdatePriority { MAJOR = 1 }; +#ifdef VTR_ENABLE_DEBUG_LOGGING +constexpr bool VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR = true; +#else +constexpr bool VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR = false; +#endif + #define MAX_SHORT 32767 /* Values large enough to be way out of range for any data, but small enough @@ -126,7 +132,7 @@ enum class e_router_lookahead { enum class e_route_bb_update { STATIC, /// { }; } // namespace std -struct t_place_region { - float capacity; /// sb; std::unique_ptr cb; short arch_wire_switch = 0; @@ -1501,7 +1481,7 @@ struct t_seg_details { short arch_opin_between_dice_switch = 0; float Rmetal = 0; float Cmetal = 0; - bool twisted = 0; + bool twisted = false; enum Direction direction = Direction::NONE; int group_start = 0; int group_size = 0; @@ -1577,16 +1557,6 @@ class t_chan_seg_details { */ typedef vtr::NdMatrix t_chan_details; -/** - * @brief A linked list of float pointers. - * - * Used for keeping track of which pathcosts in the router have been changed. - */ -struct t_linked_f_pointer { - t_linked_f_pointer* next; - float* fptr; -}; - constexpr bool is_pin(e_rr_type type) { return (type == IPIN || type == OPIN); } constexpr bool is_chan(e_rr_type type) { return (type == CHANX || type == CHANY); } constexpr bool is_src_sink(e_rr_type type) { return (type == SOURCE || type == SINK); } @@ -1691,8 +1661,6 @@ struct t_non_configurable_rr_sets { std::set> edge_sets; }; -#define NO_PREVIOUS -1 - ///@brief Power estimation options struct t_power_opts { bool do_power; /// -#include #include #include #include -#include #include #include -#include #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" @@ -29,7 +26,6 @@ #include "vtr_color_map.h" #include "vtr_path.h" -#include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" @@ -37,15 +33,10 @@ #include "draw.h" #include "draw_basic.h" #include "draw_rr.h" -#include "draw_rr_edges.h" #include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_mux.h" #include "draw_searchbar.h" -#include "read_xml_arch_file.h" #include "draw_global.h" #include "intra_logic_block.h" -#include "atom_netlist.h" #include "tatum/report/TimingPathCollector.hpp" #include "hsl.h" #include "route_export.h" @@ -53,29 +44,12 @@ #include "save_graphics.h" #include "timing_info.h" #include "physical_types.h" -#include "route_common.h" -#include "breakpoint.h" #include "manual_moves.h" #include "draw_noc.h" #include "draw_floorplanning.h" #include "move_utils.h" #include "ui_setup.h" -#include "buttons.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif #ifndef NO_GRAPHICS diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 64b3d49979f..99058b0975c 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -2,20 +2,15 @@ * that aren't RR nodes or muxes (they have their own file). * All functions in this file contain the prefix draw_. */ #include -#include -#include #include #include #include #include -#include #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" #include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" #include "vpr_error.h" @@ -26,31 +21,13 @@ #include "draw_rr.h" #include "draw_rr_edges.h" #include "draw_basic.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" -#include "intra_logic_block.h" #include "move_utils.h" #include "route_export.h" #include "tatum/report/TimingPathCollector.hpp" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp index 8e93d0ca7bd..9e56cfda5ac 100644 --- a/vpr/src/draw/draw_floorplanning.cpp +++ b/vpr/src/draw/draw_floorplanning.cpp @@ -1,42 +1,19 @@ #include -#include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" -#include "atom_netlist.h" + #include "draw_floorplanning.h" #include "user_place_constraints.h" #include "draw_color.h" #include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" -#include "draw_basic.h" -#include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" #include "intra_logic_block.h" -#include "move_utils.h" #include "route_export.h" #include "tatum/report/TimingPathCollector.hpp" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_mux.cpp b/vpr/src/draw/draw_mux.cpp index 746af57d811..e83fde50296 100644 --- a/vpr/src/draw/draw_mux.cpp +++ b/vpr/src/draw/draw_mux.cpp @@ -1,51 +1,14 @@ /*draw_mux.cpp contains all functions that draw muxes.*/ -#include -#include -#include -#include + #include -#include #include -#include #include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" - -#include "vpr_utils.h" -#include "vpr_error.h" - -#include "globals.h" #include "draw_color.h" -#include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" -#include "draw_basic.h" -#include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_searchbar.h" + #include "draw_mux.h" #include "read_xml_arch_file.h" -#include "draw_global.h" - -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif #ifndef NO_GRAPHICS @@ -64,39 +27,31 @@ ezgl::rectangle draw_mux(ezgl::point2d origin, e_side orientation, float height, switch (orientation) { case TOP: //Clock-wise from bottom left - mux_polygon.push_back({origin.x - height / 2, origin.y - width / 2}); - mux_polygon.push_back( - {origin.x - (scale * height) / 2, origin.y + width / 2}); - mux_polygon.push_back( - {origin.x + (scale * height) / 2, origin.y + width / 2}); - mux_polygon.push_back({origin.x + height / 2, origin.y - width / 2}); + mux_polygon.emplace_back(origin.x - height / 2, origin.y - width / 2); + mux_polygon.emplace_back(origin.x - (scale * height) / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + (scale * height) / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + height / 2, origin.y - width / 2); break; case BOTTOM: //Clock-wise from bottom left - mux_polygon.push_back( - {origin.x - (scale * height) / 2, origin.y - width / 2}); - mux_polygon.push_back({origin.x - height / 2, origin.y + width / 2}); - mux_polygon.push_back({origin.x + height / 2, origin.y + width / 2}); - mux_polygon.push_back( - {origin.x + (scale * height) / 2, origin.y - width / 2}); + mux_polygon.emplace_back(origin.x - (scale * height) / 2, origin.y - width / 2); + mux_polygon.emplace_back(origin.x - height / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + height / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + (scale * height) / 2, origin.y - width / 2); break; case LEFT: //Clock-wise from bottom left - mux_polygon.push_back( - {origin.x - width / 2, origin.y - (scale * height) / 2}); - mux_polygon.push_back( - {origin.x - width / 2, origin.y + (scale * height) / 2}); - mux_polygon.push_back({origin.x + width / 2, origin.y + height / 2}); - mux_polygon.push_back({origin.x + width / 2, origin.y - height / 2}); + mux_polygon.emplace_back(origin.x - width / 2, origin.y - (scale * height) / 2); + mux_polygon.emplace_back(origin.x - width / 2, origin.y + (scale * height) / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y + height / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y - height / 2); break; case RIGHT: //Clock-wise from bottom left - mux_polygon.push_back({origin.x - width / 2, origin.y - height / 2}); - mux_polygon.push_back({origin.x - width / 2, origin.y + height / 2}); - mux_polygon.push_back( - {origin.x + width / 2, origin.y + (scale * height) / 2}); - mux_polygon.push_back( - {origin.x + width / 2, origin.y - (scale * height) / 2}); + mux_polygon.emplace_back(origin.x - width / 2, origin.y - height / 2); + mux_polygon.emplace_back(origin.x - width / 2, origin.y + height / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y + (scale * height) / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y - (scale * height) / 2); break; default: diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index 02645f6baf5..e3c2467cd9e 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -1,20 +1,13 @@ /*draw_rr.cpp contains all functions that relate to drawing routing resources.*/ #include -#include -#include #include #include -#include #include -#include #include "rr_graph_fwd.h" #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" #include "vpr_error.h" @@ -25,29 +18,12 @@ #include "draw_rr.h" #include "draw_rr_edges.h" #include "draw_basic.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" #include "draw_searchbar.h" #include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp index 274e02e0fb3..0815be661a6 100644 --- a/vpr/src/draw/draw_rr_edges.cpp +++ b/vpr/src/draw/draw_rr_edges.cpp @@ -1,19 +1,8 @@ /*draw_rr_edges.cpp contains all functions that draw lines between RR nodes.*/ -#include -#include -#include -#include #include -#include -#include -#include #include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" #include "vpr_error.h" @@ -23,28 +12,11 @@ #include "draw.h" #include "draw_rr.h" #include "draw_rr_edges.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" #include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" #include "draw_basic.h" -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif #ifndef NO_GRAPHICS diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp index 00a1208bcba..a90583f42ce 100644 --- a/vpr/src/draw/draw_searchbar.cpp +++ b/vpr/src/draw/draw_searchbar.cpp @@ -1,52 +1,20 @@ /*draw_searchbar.cpp contains all functions related to searchbar actions.*/ #include -#include -#include -#include -#include -#include #include -#include #include "netlist_fwd.h" -#include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" -#include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" -#include "vpr_error.h" #include "globals.h" #include "draw_color.h" #include "draw.h" #include "draw_rr.h" -#include "draw_rr_edges.h" #include "draw_basic.h" -#include "draw_toggle_functions.h" -#include "draw_triangle.h" #include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" #include "intra_logic_block.h" -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif #ifndef NO_GRAPHICS @@ -261,7 +229,7 @@ std::set draw_expand_non_configurable_rr_nodes(RRNodeId from_node) { void deselect_all() { // Sets the color of all clbs, nets and rr_nodes to the default. - // as well as clearing the highlighed sub-block + // as well as clearing the highlighted sub-block t_draw_state* draw_state = get_draw_state_vars(); const auto& cluster_ctx = g_vpr_ctx.clustering(); diff --git a/vpr/src/draw/draw_toggle_functions.cpp b/vpr/src/draw/draw_toggle_functions.cpp index 0f69b4c6087..968808c2906 100644 --- a/vpr/src/draw/draw_toggle_functions.cpp +++ b/vpr/src/draw/draw_toggle_functions.cpp @@ -1,53 +1,18 @@ - -#include -#include #include -#include -#include -#include #include #include -#include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" -#include "vtr_color_map.h" -#include "vtr_path.h" - #include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" #include "draw_color.h" #include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" #include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" -#include "read_xml_arch_file.h" + #include "draw_global.h" #include "draw_basic.h" -#include "hsl.h" -#include "move_utils.h" -#include "intra_logic_block.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif #ifndef NO_GRAPHICS @@ -70,13 +35,10 @@ constexpr float EMPTY_BLOCK_LIGHTEN_FACTOR = 0.20; * @param app ezgl::application */ void toggle_nets_cbk(GtkComboBox* self, ezgl::application* app) { - std::cout << "Nets toggled" << std::endl; enum e_draw_nets new_state; t_draw_state* draw_state = get_draw_state_vars(); - std::cout << draw_state << std::endl; gchar* setting = gtk_combo_box_text_get_active_text( GTK_COMBO_BOX_TEXT(self)); - std::cout << setting << std::endl; // assign corresponding enum value to draw_state->show_nets if (strcmp(setting, "None") == 0) new_state = DRAW_NO_NETS; @@ -467,7 +429,7 @@ void select_layer_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/ // Only iterate through checkboxes with name "Layer ...", skip Cross Layer Connection if (std::string(name).find("Layer") != std::string::npos && std::string(name).find("Cross") == std::string::npos) { - // Change the the boolean of the draw_layer_display vector depending on checkbox + // Change the boolean of the draw_layer_display vector depending on checkbox if (state) { draw_state->draw_layer_display[index].visible = true; } else { @@ -492,7 +454,7 @@ void transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/ int index = 0; // Iterate over transparency layers - for (GList* iter = children; iter != NULL; iter = g_list_next(iter)) { + for (GList* iter = children; iter != nullptr; iter = g_list_next(iter)) { if (GTK_IS_SPIN_BUTTON(iter->data)) { GtkWidget* spin_button = GTK_WIDGET(iter->data); const gchar* name = gtk_widget_get_name(spin_button); diff --git a/vpr/src/draw/draw_triangle.cpp b/vpr/src/draw/draw_triangle.cpp index 370868efbbc..82b5bd45376 100644 --- a/vpr/src/draw/draw_triangle.cpp +++ b/vpr/src/draw/draw_triangle.cpp @@ -1,50 +1,12 @@ -#include -#include -#include + #include -#include -#include -#include -#include #include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" - -#include "vpr_utils.h" -#include "vpr_error.h" - -#include "globals.h" #include "draw_color.h" -#include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" -#include "read_xml_arch_file.h" #include "draw_global.h" -#include "draw_basic.h" - -#include "move_utils.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif #ifndef NO_GRAPHICS diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp index 81fe89ebcb0..6a98d6ee419 100644 --- a/vpr/src/draw/manual_moves.cpp +++ b/vpr/src/draw/manual_moves.cpp @@ -261,16 +261,16 @@ void manual_move_cost_summary_dialog() { switch (result) { //If the user accepts the manual move case GTK_RESPONSE_ACCEPT: - draw_state->manual_moves_state.manual_move_info.user_move_outcome = ACCEPTED; + draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::ACCEPTED; application.update_message(msg); break; //If the user rejects the manual move case GTK_RESPONSE_REJECT: - draw_state->manual_moves_state.manual_move_info.user_move_outcome = REJECTED; + draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::REJECTED; application.update_message("Manual move was rejected"); break; default: - draw_state->manual_moves_state.manual_move_info.user_move_outcome = ABORTED; + draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::ABORTED; break; } diff --git a/vpr/src/draw/manual_moves.h b/vpr/src/draw/manual_moves.h index 9633bf35b89..45739549c85 100644 --- a/vpr/src/draw/manual_moves.h +++ b/vpr/src/draw/manual_moves.h @@ -57,8 +57,8 @@ struct ManualMovesInfo { double delta_bounding_box = 0; bool valid_input = true; t_pl_loc to_location; - e_move_result placer_move_outcome = ABORTED; - e_move_result user_move_outcome = ABORTED; + e_move_result placer_move_outcome = e_move_result::ABORTED; + e_move_result user_move_outcome = e_move_result::ABORTED; }; /** diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index b557b27d5ca..5e78934841a 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -42,16 +42,6 @@ # include "route_export.h" # include "search_bar.h" -# ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -# else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -# endif - //To process key presses we need the X11 keysym definitions, //which are unavailable when building with MINGW # if defined(X11) && !defined(__MINGW32__) @@ -76,7 +66,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { std::stringstream ss(user_input); auto search_type = get_search_type(app); - if (search_type == "") + if (search_type.empty()) return; // reset @@ -119,7 +109,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { * * If the block does not exist in the atom netlist, we will check the CLB netlist to see if * they searched for a cluster block*/ - std::string block_name = ""; + std::string block_name; ss >> block_name; AtomBlockId atom_blk_id = atom_ctx.nlist.find_block(block_name); @@ -159,7 +149,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { else if (search_type == "Net Name") { //in this case, all nets (clb and non-clb) are contained in the atom netlist //So we only need to search this one - std::string net_name = ""; + std::string net_name; ss >> net_name; AtomNetId atom_net_id = atom_ctx.nlist.find_net(net_name); @@ -376,8 +366,6 @@ void warning_dialog_box(const char* message) { "response", G_CALLBACK(gtk_widget_destroy), dialog); - - return; } /** @@ -411,7 +399,7 @@ void search_type_changed(GtkComboBox* self, ezgl::application* app) { } else if (searchType == "Net Name") { gtk_entry_completion_set_model(completion, netNames); } else { //setting to null if option does not require auto-complete - gtk_entry_completion_set_model(completion, NULL); + gtk_entry_completion_set_model(completion, nullptr); gtk_entry_set_completion(searchBar, nullptr); } } @@ -506,10 +494,10 @@ void enable_autocomplete(ezgl::application* app) { auto draw_state = get_draw_state_vars(); std::string searchType = get_search_type(app); - if (searchType == "") + if (searchType.empty()) return; //Checking to make sure that we are on a mode that uses auto-complete - if (gtk_entry_completion_get_model(completion) == NULL) { + if (gtk_entry_completion_get_model(completion) == nullptr) { std::cout << "NO MODEL SELECTED" << std::endl; return; } diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index 5461d1e095f..354135f2097 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -97,8 +97,6 @@ std::map do_clustering(const t_packer_opts& pa /**************************************************************** * Initialization *****************************************************************/ - VTR_ASSERT(packer_opts.packer_algorithm == PACK_GREEDY); - t_cluster_progress_stats cluster_stats; //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp index bb662c988d0..b33e05f077a 100644 --- a/vpr/src/place/RL_agent_util.cpp +++ b/vpr/src/place/RL_agent_util.cpp @@ -60,7 +60,7 @@ std::pair, std::unique_ptr> create second_state_avail_moves.push_back(e_move_type::NOC_ATTRACTION_CENTROID); } - if (placer_opts.place_agent_algorithm == E_GREEDY) { + if (placer_opts.place_agent_algorithm == e_agent_algorithm::E_GREEDY) { std::unique_ptr karmed_bandit_agent1, karmed_bandit_agent2; //agent's 1st state if (placer_opts.place_agent_space == e_agent_space::MOVE_BLOCK_TYPE) { @@ -133,40 +133,20 @@ std::pair, std::unique_ptr> create return move_generators; } -void assign_current_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator) { +MoveGenerator& select_move_generator(std::unique_ptr& move_generator, + std::unique_ptr& move_generator2, + e_agent_state agent_state, + const t_placer_opts& placer_opts, + bool in_quench) { if (in_quench) { if (placer_opts.place_quench_algorithm.is_timing_driven() && placer_opts.place_agent_multistate) - current_move_generator = std::move(move_generator2); + return *move_generator2; else - current_move_generator = std::move(move_generator); + return *move_generator; } else { if (agent_state == e_agent_state::EARLY_IN_THE_ANNEAL || !placer_opts.place_agent_multistate) - current_move_generator = std::move(move_generator); + return *move_generator; else - current_move_generator = std::move(move_generator2); - } -} - -void update_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator) { - if (in_quench) { - if (placer_opts.place_quench_algorithm.is_timing_driven() && placer_opts.place_agent_multistate) - move_generator2 = std::move(current_move_generator); - else - move_generator = std::move(current_move_generator); - } else { - if (agent_state == e_agent_state::EARLY_IN_THE_ANNEAL || !placer_opts.place_agent_multistate) - move_generator = std::move(current_move_generator); - else - move_generator2 = std::move(current_move_generator); + return *move_generator2; } } \ No newline at end of file diff --git a/vpr/src/place/RL_agent_util.h b/vpr/src/place/RL_agent_util.h index 81d09eaa755..0ec801e8000 100644 --- a/vpr/src/place/RL_agent_util.h +++ b/vpr/src/place/RL_agent_util.h @@ -33,22 +33,13 @@ std::pair, std::unique_ptr> create vtr::RngContainer& rng); /** - * @brief copy one of the available move_generators to be the current move_generator that would be used in the placement based on the placer_options and the agent state + * @brief Returns to one of the available move generators to be the current move generator + * that would be used in the placement based on the placer_options and the agent state */ -void assign_current_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator); +MoveGenerator& select_move_generator(std::unique_ptr& move_generator, + std::unique_ptr& move_generator2, + e_agent_state agent_state, + const t_placer_opts& placer_opts, + bool in_quench); -/** - * @brief move the updated current_move_generator to its original move_Generator structure based on the placer_options and the agent state - */ -void update_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator); #endif diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp new file mode 100644 index 00000000000..1d8836956ab --- /dev/null +++ b/vpr/src/place/annealer.cpp @@ -0,0 +1,877 @@ + +#include "annealer.h" + +#include +#include + +#include "globals.h" +#include "draw_global.h" +#include "vpr_types.h" +#include "place_util.h" +#include "placer_state.h" +#include "move_utils.h" +#include "noc_place_utils.h" +#include "NetPinTimingInvalidator.h" +#include "place_timing_update.h" +#include "read_place.h" +#include "placer_breakpoint.h" +#include "RL_agent_util.h" + +/**************************************************************************/ +/*************** Static Function Declarations *****************************/ +/**************************************************************************/ + +/** + * @brief Check if the setup slack has gotten better or worse due to block swap. + * + * Get all the modified slack values via the PlacerSetupSlacks class, and compare + * then with the original values at these connections. Sort them and compare them + * one by one, and return the difference of the first different pair. + * + * If the new slack value is larger(better), than return a negative value so that + * the move will be accepted. If the new slack value is smaller(worse), return a + * positive value so that the move will be rejected. + * + * If no slack values have changed, then return an arbitrary positive number. A + * move resulting in no change in the slack values should probably be unnecessary. + * + * The sorting is needed to prevent in the unlikely circumstance that a bad slack + * value suddenly got very good due to the block move, while a good slack value + * got very bad, perhaps even worse than the original worse slack value. + */ +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state); + +/*************************************************************************/ +/*************** Static Function Definitions *****************************/ +/*************************************************************************/ + +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + const auto& p_timing_ctx = placer_state.timing(); + const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack; + + //Find the original/proposed setup slacks of pins with modified values + std::vector original_setup_slacks, proposed_setup_slacks; + + auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); + for (ClusterPinId clb_pin : clb_pins_modified) { + ClusterNetId net_id = clb_nlist.pin_net(clb_pin); + size_t ipin = clb_nlist.pin_net_index(clb_pin); + + original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); + proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin)); + } + + //Sort in ascending order, from the worse slack value to the best + std::stable_sort(original_setup_slacks.begin(), original_setup_slacks.end()); + std::stable_sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); + + //Check the first pair of slack values that are different + //If found, return their difference + for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { + float slack_diff = original_setup_slacks[idiff] - proposed_setup_slacks[idiff]; + + if (slack_diff != 0) { + return slack_diff; + } + } + + //If all slack values are identical (or no modified slack values), + //reject this move by returning an arbitrary positive number as cost. + return 1; +} + +/**************************************************************************************/ +/*************** Member Function Definitions for t_annealing_state ********************/ +/**************************************************************************************/ + +///@brief Constructor: Initialize all annealing state variables and macros. +t_annealing_state::t_annealing_state(float first_t, + float first_rlim, + int first_move_lim, + float first_crit_exponent) { + num_temps = 0; + alpha = 1.f; + t = first_t; + rlim = first_rlim; + move_lim_max = first_move_lim; + crit_exponent = first_crit_exponent; + move_lim = move_lim_max; + + /* Store this inverse value for speed when updating crit_exponent. */ + INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM); + + /* The range limit cannot exceed the largest grid size. */ + const auto& grid = g_vpr_ctx.device().grid; + UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1); +} + +bool t_annealing_state::outer_loop_update(float success_rate, + const t_placer_costs& costs, + const t_placer_opts& placer_opts) { +#ifndef NO_GRAPHICS + t_draw_state* draw_state = get_draw_state_vars(); + if (!draw_state->list_of_breakpoints.empty()) { + // Update temperature in the current information variable. + get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++; + } +#endif + + if (placer_opts.anneal_sched.type == e_sched_type::USER_SCHED) { + // Update t with user specified alpha. + t *= placer_opts.anneal_sched.alpha_t; + + // Check if the exit criterion is met. + bool exit_anneal = t >= placer_opts.anneal_sched.exit_t; + + return exit_anneal; + } + + // Automatically determine exit temperature. + auto& cluster_ctx = g_vpr_ctx.clustering(); + float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); + + + VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); + // Automatically adjust alpha according to success rate. + if (success_rate > 0.96) { + alpha = 0.5; + } else if (success_rate > 0.8) { + alpha = 0.9; + } else if (success_rate > 0.15 || rlim > 1.) { + alpha = 0.95; + } else { + alpha = 0.8; + } + // Update temp. + t *= alpha; + // Must be duplicated to retain previous behavior. + if (t < t_exit || std::isnan(t_exit)) { + return false; + } + + // Update the range limiter. + update_rlim(success_rate); + + // If using timing driven algorithm, update the crit_exponent. + if (placer_opts.place_algorithm.is_timing_driven()) { + update_crit_exponent(placer_opts); + } + + // Continues the annealing. + return true; +} + +void t_annealing_state::update_rlim(float success_rate) { + rlim *= (1. - 0.44 + success_rate); + rlim = std::min(rlim, UPPER_RLIM); + rlim = std::max(rlim, FINAL_RLIM); +} + +void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { + // If rlim == FINAL_RLIM, then scale == 0. + float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM; + + // Apply the scaling factor on crit_exponent. + crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + + placer_opts.td_place_exp_first; +} + +/**************************************************************************************/ +/*************** Member Function Definitions for PlacementAnnealer ********************/ +/**************************************************************************************/ + +PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, + PlacerState& placer_state, + t_placer_costs& costs, + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler, + const t_noc_opts& noc_opts, + vtr::RngContainer& rng, + std::unique_ptr&& move_generator_1, + std::unique_ptr&& move_generator_2, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + int move_lim) + : placer_opts_(placer_opts) + , placer_state_(placer_state) + , costs_(costs) + , net_cost_handler_(net_cost_handler) + , noc_cost_handler_(noc_cost_handler) + , noc_opts_(noc_opts) + , rng_(rng) + , move_generator_1_(std::move(move_generator_1)) + , move_generator_2_(std::move(move_generator_2)) + , manual_move_generator_(placer_state, rng) + , agent_state_(e_agent_state::EARLY_IN_THE_ANNEAL) + , delay_model_(delay_model) + , criticalities_(criticalities) + , setup_slacks_(setup_slacks) + , timing_info_(timing_info) + , pin_timing_invalidator_(pin_timing_invalidator) + , move_stats_file_(nullptr, vtr::fclose) + , outer_crit_iter_count_(1) + , blocks_affected_(placer_state.block_locs().size()) + , quench_started_(false) +{ + const auto& device_ctx = g_vpr_ctx.device(); + + float first_crit_exponent; + if (placer_opts.place_algorithm.is_timing_driven()) { + first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ + } else { + first_crit_exponent = 0.f; + } + + int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched); + + if (placer_opts.inner_loop_recompute_divider != 0) { + inner_recompute_limit_ = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); + } else { + // don't do an inner recompute + inner_recompute_limit_ = first_move_lim + 1; + } + + /* calculate the number of moves in the quench that we should recompute timing after based on the value of * + * the commandline option quench_recompute_divider */ + if (placer_opts.quench_recompute_divider != 0) { + quench_recompute_limit_ = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); + } else { + // don't do an quench recompute + quench_recompute_limit_ = first_move_lim + 1; + } + + moves_since_cost_recompute_ = 0; + tot_iter_ = 0; + + // Get the first range limiter + placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); + + annealing_state_ = t_annealing_state(EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved + placer_state_.move().first_rlim, + first_move_lim, + first_crit_exponent); + + if (!placer_opts.move_stats_file.empty()) { + move_stats_file_ = std::unique_ptr( + vtr::fopen(placer_opts.move_stats_file.c_str(), "w"), + vtr::fclose); + LOG_MOVE_STATS_HEADER(); + } + + //allocate move type statistics vectors + move_type_stats_.blk_type_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); + move_type_stats_.accepted_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); + move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); + + // Update the starting temperature for placement annealing to a more appropriate value + annealing_state_.t = estimate_starting_temperature_(); +} + +float PlacementAnnealer::estimate_starting_temperature_() { + if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) { + return placer_opts_.anneal_sched.init_t; + } + + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + // Use to calculate the average of cost when swap is accepted. + int num_accepted = 0; + + // Use double types to avoid round off. + double av = 0., sum_of_squares = 0.; + + // Determines the block swap loop count. + int move_lim = std::min(annealing_state_.move_lim_max, (int)cluster_ctx.clb_nlist.blocks().size()); + + bool manual_move_enabled = false; + + for (int i = 0; i < move_lim; i++) { +#ifndef NO_GRAPHICS + // Checks manual move flag for manual move feature + t_draw_state* draw_state = get_draw_state_vars(); + if (draw_state->show_graphics) { + manual_move_enabled = manual_move_is_selected(); + } +#endif /*NO_GRAPHICS*/ + + // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack + e_move_result swap_result = try_swap_(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled); + + if (swap_result == e_move_result::ACCEPTED) { + num_accepted++; + av += costs_.cost; + sum_of_squares += costs_.cost * costs_.cost; + swap_stats_.num_swap_accepted++; + } else if (swap_result == e_move_result::ABORTED) { + swap_stats_.num_swap_aborted++; + } else { + swap_stats_.num_swap_rejected++; + } + } + + // Take the average of the accepted swaps' cost values. + av = num_accepted > 0 ? (av / num_accepted) : 0.; + + // Get the standard deviation. + double std_dev = get_std_dev(num_accepted, sum_of_squares, av); + + // Print warning if not all swaps are accepted. + if (num_accepted != move_lim) { + VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n", + num_accepted, move_lim); + } + + // Improved initial placement uses a fast SA for NoC routers and centroid placement + // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement + float init_temp = std_dev / 64; + + return init_temp; +} + +e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, + const t_place_algorithm& place_algorithm, + bool manual_move_enabled) { + /* Picks some block and moves it to another spot. If this spot is + * occupied, switch the blocks. Assess the change in cost function. + * rlim is the range limiter. + * Returns whether the swap is accepted, rejected or aborted. + * Passes back the new value of the cost functions. + */ + auto& blk_loc_registry = placer_state_.mutable_blk_loc_registry(); + + // increment the call counter + swap_stats_.num_ts_called++; + + PlaceCritParams crit_params{annealing_state_.crit_exponent, + placer_opts_.place_crit_limit}; + + // move type and block type chosen by the agent + t_propose_action proposed_action{e_move_type::UNIFORM, -1}; + + MoveOutcomeStats move_outcome_stats; + + /* I'm using negative values of proposed_net_cost as a flag, + * so DO NOT use cost functions that can go negative. */ + double delta_c = 0; //Change in cost due to this swap. + double bb_delta_c = 0; //Change in the bounding box (wiring) cost. + double timing_delta_c = 0; //Change in the timing cost (delay * criticality). + + + /* Allow some fraction of moves to not be restricted by rlim, + * in the hopes of better escaping local minima. */ + float rlim; + if (placer_opts_.rlim_escape_fraction > 0. && rng_.frand() < placer_opts_.rlim_escape_fraction) { + rlim = std::numeric_limits::infinity(); + } else { + rlim = annealing_state_.rlim; + } + + e_create_move create_move_outcome = e_create_move::ABORT; + + // Determine whether we need to force swap two NoC router blocks + bool router_block_move = false; + if (noc_opts_.noc) { + router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_); + } + + //When manual move toggle button is active, the manual move window asks the user for input. + if (manual_move_enabled) { +#ifndef NO_GRAPHICS + create_move_outcome = manual_move_display_and_propose(manual_move_generator_, blocks_affected_, + proposed_action.move_type, rlim, placer_opts_, + criticalities_); +#endif //NO_GRAPHICS + } else if (router_block_move) { + // generate a move where two random router blocks are swapped + create_move_outcome = propose_router_swap(blocks_affected_, rlim, blk_loc_registry, rng_); + proposed_action.move_type = e_move_type::UNIFORM; + } else { + //Generate a new move (perturbation) used to explore the space of possible placements + create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_); + } + + move_type_stats_.incr_blk_type_moves(proposed_action); + + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) LOG_MOVE_STATS_PROPOSED(); + + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", + costs_.cost, costs_.bb_cost, costs_.timing_cost); + + e_move_result move_outcome = e_move_result::ABORTED; + + if (create_move_outcome == e_create_move::ABORT) { + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + LOG_MOVE_STATS_OUTCOME(std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN(), "ABORTED", + "illegal move"); + } + + move_outcome = e_move_result::ABORTED; + + } else { + VTR_ASSERT(create_move_outcome == e_create_move::VALID); + + /* To make evaluating the move simpler (e.g. calculating changed bounding box), + * we first move the blocks to their new locations (apply the move to + * blk_loc_registry.block_locs) and then compute the change in cost. If the move + * is accepted, the inverse look-up in blk_loc_registry.grid_blocks is updated + * (committing the move). If the move is rejected, the blocks are returned to + * their original positions (reverting blk_loc_registry.block_locs to its original state). + * + * Note that the inverse look-up blk_loc_registry.grid_blocks is only updated after + * move acceptance is determined, so it should not be used when evaluating a move. + */ + + // Update the block positions + blk_loc_registry.apply_move_blocks(blocks_affected_); + + /* Find all the nets affected by this swap and update the wiring costs. + * This cost value doesn't depend on the timing info. + * Also find all the pins affected by the swap, and calculates new connection + * delays and timing costs and store them in proposed_* data structures. + */ + net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_, + bb_delta_c, timing_delta_c); + + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { + /* Take delta_c as a combination of timing and wiring cost. In + * addition to `timing_tradeoff`, we normalize the cost values. + * CRITICALITY_TIMING_PLACE algorithm works with somewhat stale + * timing information to save CPU time. + */ + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, " + "timing_delta_c %e, timing_cost_norm %e\n", + bb_delta_c, + costs_.bb_cost_norm, + placer_opts_.timing_tradeoff, + timing_delta_c, + costs_.timing_cost_norm); + delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; + } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + /* For setup slack analysis, we first do a timing analysis to get the newest + * slack values resulted from the proposed block moves. If the move turns out + * to be accepted, we keep the updated slack values and commit the block moves. + * If rejected, we reject the proposed block moves and revert this timing analysis. + * + * It should be noted that when SLACK_TIMING_PLACE algorithm is used, proposed moves + * are evaluated with up-to-date timing information, which is more expensive but more + * accurate. + */ + + // Invalidates timing of modified connections for incremental timing updates. + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + + /* Update the connection_timing_cost and connection_delay + * values from the temporary values. */ + placer_state_.mutable_timing().commit_td_cost(blocks_affected_); + + /* Update timing information. Since we are analyzing setup slacks, + * we only update those values and keep the criticalities stale + * so as not to interfere with the original timing driven algorithm. + * + * Note: the timing info must be updated after applying block moves + * and committing the timing driven delays and costs. + * If we wish to revert this timing update due to move rejection, + * we need to revert block moves and restore the timing values. */ + criticalities_->disable_update(); + setup_slacks_->enable_update(); + update_timing_classes(crit_params, timing_info_, criticalities_, + setup_slacks_, pin_timing_invalidator_, placer_state_); + + /* Get the setup slack analysis cost */ + //TODO: calculate a weighted average of the slack cost and wiring cost + delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm; + } else { + VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tMove bb_delta_c %e, bb_cost_norm %e\n", + bb_delta_c, + costs_.bb_cost_norm); + delta_c = bb_delta_c * costs_.bb_cost_norm; + } + + NocCostTerms noc_delta_c; // change in NoC cost + /* Update the NoC data structure and costs*/ + if (noc_opts_.noc) { + VTR_ASSERT_SAFE(noc_cost_handler_.has_value()); + noc_cost_handler_->find_affected_noc_routers_and_update_noc_costs(blocks_affected_, noc_delta_c); + + // Include the NoC delta costs in the total cost change for this swap + delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_); + } + + // determine whether the move is accepted or rejected + move_outcome = assess_swap_(delta_c, annealing_state_.t); + + //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move. +#ifndef NO_GRAPHICS + if (manual_move_enabled) { + move_outcome = pl_do_manual_move(delta_c, timing_delta_c, bb_delta_c, move_outcome); + } +#endif //NO_GRAPHICS + + if (move_outcome == e_move_result::ACCEPTED) { + costs_.cost += delta_c; + costs_.bb_cost += bb_delta_c; + + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { + costs_.timing_cost += timing_delta_c; + + /* Invalidates timing of modified connections for incremental + * timing updates. These invalidations are accumulated for a + * big timing update in the outer loop. */ + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + + /* Update the connection_timing_cost and connection_delay + * values from the temporary values. */ + placer_state_.mutable_timing().commit_td_cost(blocks_affected_); + + } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + // Update the timing driven cost as usual + costs_.timing_cost += timing_delta_c; + + // Commit the setup slack information + // The timing delay and cost values should be committed already + commit_setup_slacks(setup_slacks_, placer_state_); + } + + // Update net cost functions and reset flags. + net_cost_handler_.update_move_nets(); + + // Update clb data structures since we kept the move. + blk_loc_registry.commit_move_blocks(blocks_affected_); + + if (noc_opts_.noc){ + noc_cost_handler_->commit_noc_costs(); + costs_ += noc_delta_c; + } + + //Highlights the new block when manual move is selected. +#ifndef NO_GRAPHICS + if (manual_move_enabled) { + manual_move_highlight_new_block_location(); + } +#endif //NO_GRAPHICS + + } else { + VTR_ASSERT_SAFE(move_outcome == e_move_result::REJECTED); + + // Reset the net cost function flags first. + net_cost_handler_.reset_move_nets(); + + // Restore the blk_loc_registry.block_locs data structures to their state before the move. + blk_loc_registry.revert_move_blocks(blocks_affected_); + + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { + // Un-stage the values stored in proposed_* data structures + placer_state_.mutable_timing().revert_td_cost(blocks_affected_); + } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + /* Revert the timing delays and costs to pre-update values. + * These routines must be called after reverting the block moves. + */ + //TODO: make this process incremental + comp_td_connection_delays(delay_model_, placer_state_); + comp_td_costs(delay_model_, *criticalities_, placer_state_, &costs_.timing_cost); + + /* Re-invalidate the affected sink pins since the proposed + * move is rejected, and the same blocks are reverted to + * their original positions. */ + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); + + // Revert the timing update + update_timing_classes(crit_params, timing_info_, criticalities_, + setup_slacks_, pin_timing_invalidator_, placer_state_); + + VTR_ASSERT_SAFE_MSG( + verify_connection_setup_slacks(setup_slacks_, placer_state_), + "The current setup slacks should be identical to the values before the try swap timing info update."); + } + + // Revert the traffic flow routes within the NoC + if (noc_opts_.noc) { + noc_cost_handler_->revert_noc_traffic_flow_routes(blocks_affected_); + } + } + + move_type_stats_.incr_accept_reject(proposed_action, move_outcome); + + move_outcome_stats.delta_cost_norm = delta_c; + move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs_.bb_cost_norm; + move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs_.timing_cost_norm; + + move_outcome_stats.delta_bb_cost_abs = bb_delta_c; + move_outcome_stats.delta_timing_cost_abs = timing_delta_c; + + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome == e_move_result::ACCEPTED ? "ACCEPTED" : "REJECTED"), ""); + } + } + move_outcome_stats.outcome = move_outcome; + + // If we force a router block move then it was not proposed by the + // move generator, so we should not calculate the reward and update + // the move generators status since this outcome is not a direct + // consequence of the move generator + if (!router_block_move) { + move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, REWARD_BB_TIMING_RELATIVE_WEIGHT); + } + +#ifndef NO_GRAPHICS + stop_placement_and_check_breakpoints(blocks_affected_, move_outcome, delta_c, bb_delta_c, timing_delta_c); +#endif + + + // Clear the data structure containing block move info + blocks_affected_.clear_move_blocks(); + + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", + costs_.cost, costs_.bb_cost, costs_.timing_cost); + return move_outcome; +} + +void PlacementAnnealer::outer_loop_update_timing_info() { + if (placer_opts_.place_algorithm.is_timing_driven()) { + /* At each temperature change we update these values to be used + * for normalizing the tradeoff between timing and wirelength (bb) */ + if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter || + placer_opts_.inner_loop_recompute_divider != 0) { + + PlaceCritParams crit_params{annealing_state_.crit_exponent, + placer_opts_.place_crit_limit}; + + // Update all timing related classes + perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_, + pin_timing_invalidator_, timing_info_, &costs_, placer_state_); + + outer_crit_iter_count_ = 0; + } + outer_crit_iter_count_++; + } + + // Update the cost normalization factors + costs_.update_norm_factors(); + + // update the current total placement cost + costs_.cost = costs_.get_total_cost(placer_opts_, noc_opts_); +} + +void PlacementAnnealer::placement_inner_loop() { + // How many times have we dumped placement to a file this temperature? + int inner_placement_save_count = 0; + + placer_stats_.reset(); + + bool manual_move_enabled = false; + + MoveGenerator& move_generator = select_move_generator(move_generator_1_, move_generator_2_, agent_state_, + placer_opts_, quench_started_); + + // Inner loop begins + for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) { + e_move_result swap_result = try_swap_(move_generator, placer_opts_.place_algorithm, manual_move_enabled); + + if (swap_result == e_move_result::ACCEPTED) { + // Move was accepted. Update statistics that are useful for the annealing schedule. + placer_stats_.single_swap_update(costs_); + swap_stats_.num_swap_accepted++; + } else if (swap_result == e_move_result::ABORTED) { + swap_stats_.num_swap_aborted++; + } else { // swap_result == REJECTED + swap_stats_.num_swap_rejected++; + } + + if (placer_opts_.place_algorithm.is_timing_driven()) { + /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? + * We do this only once in a while, since it is expensive. + */ + const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_; + // on last iteration don't recompute + if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) { + + inner_crit_iter_count = 0; + + PlaceCritParams crit_params{annealing_state_.crit_exponent, + placer_opts_.place_crit_limit}; + + // Update all timing related classes + perform_full_timing_update(crit_params, delay_model_, criticalities_, + setup_slacks_, pin_timing_invalidator_, + timing_info_, &costs_, placer_state_); + } + inner_crit_iter_count++; + } + + /* Lines below prevent too much round-off error from accumulating + * in the cost over many iterations (due to incremental updates). + * This round-off can lead to error checks failing because the cost + * is different from what you get when you recompute from scratch. + */ + moves_since_cost_recompute_++; + if (moves_since_cost_recompute_ > MAX_MOVES_BEFORE_RECOMPUTE) { + net_cost_handler_.recompute_costs_from_scratch(delay_model_, criticalities_, costs_); + + if (noc_cost_handler_.has_value()) { + noc_cost_handler_->recompute_costs_from_scratch(noc_opts_, costs_); + } + + moves_since_cost_recompute_ = 0; + } + + if (placer_opts_.placement_saves_per_temperature >= 1 && inner_iter > 0 + && (inner_iter + 1) % (annealing_state_.move_lim / placer_opts_.placement_saves_per_temperature) == 0) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", + annealing_state_.num_temps + 1, inner_placement_save_count); + VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", + inner_iter, annealing_state_.move_lim, filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), placer_state_.block_locs()); + ++inner_placement_save_count; + } + } + + // Calculate the success_rate and std_dev of the costs. + placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim); + + // update the RL agent's state + if (!quench_started_) { + if (placer_opts_.place_algorithm.is_timing_driven() && + placer_opts_.place_agent_multistate && + agent_state_ == e_agent_state::EARLY_IN_THE_ANNEAL) { + if (annealing_state_.alpha < 0.85 && annealing_state_.alpha > 0.6) { + agent_state_ = e_agent_state::LATE_IN_THE_ANNEAL; + VTR_LOG("Agent's 2nd state: \n"); + } + } + } + + tot_iter_ += annealing_state_.move_lim; + ++annealing_state_.num_temps; +} + + +int PlacementAnnealer::get_total_iteration() const { + return tot_iter_; +} + +e_agent_state PlacementAnnealer::get_agent_state() const { + return agent_state_; +} + +const t_annealing_state& PlacementAnnealer::get_annealing_state() const { + return annealing_state_; +} + +bool PlacementAnnealer::outer_loop_update_state() { + return annealing_state_.outer_loop_update(placer_stats_.success_rate, costs_, placer_opts_); +} + +void PlacementAnnealer::start_quench() { + quench_started_ = true; + + // Freeze out: only accept solutions that improve placement. + annealing_state_.t = 0; + + // Revert the move limit to initial value. + annealing_state_.move_lim = annealing_state_.move_lim_max; +} + +std::tuple PlacementAnnealer::get_stats() const { + return {swap_stats_, move_type_stats_, placer_stats_}; +} + +const MoveAbortionLogger& PlacementAnnealer::get_move_abortion_logger() const { + return blocks_affected_.move_abortion_logger; +} + +void PlacementAnnealer::LOG_MOVE_STATS_HEADER() { + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + if (move_stats_file_) { + fprintf(move_stats_file_.get(), + "temp,from_blk,to_blk,from_type,to_type," + "blk_count," + "delta_cost,delta_bb_cost,delta_td_cost," + "outcome,reason\n"); + } + } else { + if (move_stats_file_) { + fprintf(move_stats_file_.get(), + "VTR_ENABLE_DEBUG_LOGGING disabled " + "-- No move stats recorded\n"); + } + } +} + +void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& grid_blocks = placer_state_.grid_blocks(); + + if (move_stats_file_) { + ClusterBlockId b_from = blocks_affected_.moved_blocks[0].block_num; + + t_pl_loc to = blocks_affected_.moved_blocks[0].new_loc; + ClusterBlockId b_to = grid_blocks.block_at_location(to); + + t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); + t_logical_block_type_ptr to_type = nullptr; + if (b_to) { + to_type = cluster_ctx.clb_nlist.block_type(b_to); + } + + fprintf(move_stats_file_.get(), + "%g," + "%d,%d," + "%s,%s," + "%d,", + annealing_state_.t, + int(b_from), int(b_to), + from_type->name.c_str(), + to_type ? to_type->name.c_str() : "EMPTY", + (int)blocks_affected_.moved_blocks.size()); + } +} + +void PlacementAnnealer::LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost, + const char* outcome, const char* reason) { + if (move_stats_file_) { + fprintf(move_stats_file_.get(), + "%g,%g,%g," + "%s,%s\n", + delta_cost, delta_bb_cost, delta_td_cost, + outcome, reason); + } +} + +e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) { + /* Returns: 1 -> move accepted, 0 -> rejected. */ + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c); + if (delta_c <= 0) { + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n"); + return e_move_result::ACCEPTED; + } + + if (t == 0.) { + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n"); + return e_move_result::REJECTED; + } + + float fnum = rng_.frand(); + float prob_fac = std::exp(-delta_c / t); + if (prob_fac > fnum) { + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n"); + return e_move_result::ACCEPTED; + } + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); + return e_move_result::REJECTED; +} diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h new file mode 100644 index 00000000000..fd9b0dbd928 --- /dev/null +++ b/vpr/src/place/annealer.h @@ -0,0 +1,339 @@ + +#pragma once + +#include "vpr_types.h" + +#include "move_generator.h" // movestats +#include "net_cost_handler.h" +#include "manual_move_generator.h" + +#include +#include + +class PlacerState; +class t_placer_costs; +struct t_placer_opts; +enum class e_agent_state; + +class NocCostHandler; +class NetPinTimingInvalidator; + +/** + * These variables keep track of the number of swaps + * rejected, accepted or aborted. The total number of swap attempts + * is the sum of the three number. + */ +struct t_swap_stats { + int num_swap_rejected = 0; + int num_swap_accepted = 0; + int num_swap_aborted = 0; + int num_ts_called = 0; +}; + +/** + * @brief Stores variables that are used by the annealing process. + * + * This structure is updated by update_annealing_state() on each outer + * loop iteration. It stores various important variables that need to + * be accessed during the placement inner loop. + * + * Public members: + * @param t + * Temperature for simulated annealing. + * @param alpha + * Temperature decays factor (multiplied each outer loop iteration). + * @param num_temps + * The count of how many temperature iterations have passed. + * @param rlim + * Range limit for block swaps. + * Currently only updated by AUTO_SCHED. + * @param crit_exponent + * Used by timing-driven placement to "sharpen" the timing criticality. + * Depends on rlim. Currently only updated by AUTO_SCHED. + * @param move_lim + * Current block move limit. + * @param move_lim_max + * Maximum block move limit. + * + * Private members: + * @param UPPER_RLIM + * The upper limit for the range limiter value. + * @param FINAL_RLIM + * The final rlim (range limit) is 1, which is the smallest value that + * can still make progress, since an rlim of 0 wouldn't allow any swaps. + * @param INVERSE_DELTA_RLIM + * Used to update crit_exponent. See update_rlim() for more. + * + * Mutators: + * @param outer_loop_update() + * Update the annealing state variables in the placement outer loop. + * @param update_rlim(), update_crit_exponent(), update_move_lim() + * Inline subroutines used by the main routine outer_loop_update(). + */ +class t_annealing_state { + public: + float t; + float alpha; + int num_temps; + + float rlim; + float crit_exponent; + int move_lim; + int move_lim_max; + + private: + float UPPER_RLIM; + float FINAL_RLIM = 1.; + float INVERSE_DELTA_RLIM; + + public: //Constructor + t_annealing_state() = default; + t_annealing_state(float first_t, + float first_rlim, + int first_move_lim, + float first_crit_exponent); + + public: //Mutator + /** + * @brief Update the annealing state according to the annealing schedule selected. + * + * USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria. + * AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio. + * + * @return True->continues the annealing. False->exits the annealing. + */ + bool outer_loop_update(float success_rate, + const t_placer_costs& costs, + const t_placer_opts& placer_opts); + + private: //Mutator + /** + * @brief Update the range limiter to keep acceptance prob. near 0.44. + * + * Use a floating point rlim to allow gradual transitions at low temps. + * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM). + */ + inline void update_rlim(float success_rate); + + /** + * @brief Update the criticality exponent. + * + * When rlim shrinks towards the FINAL_RLIM value (indicating + * that we are fine-tuning a more optimized placement), we can + * focus more on a smaller number of critical connections. + * To achieve this, we make the crit_exponent sharper, so that + * critical connections would become more critical than before. + * + * We calculate how close rlim is to its final value comparing + * to its initial value. Then, we apply the same scaling factor + * on the crit_exponent so that it lands on the suitable value + * between td_place_exp_first and td_place_exp_last. The scaling + * factor is calculated and applied linearly. + */ + inline void update_crit_exponent(const t_placer_opts& placer_opts); +}; + +/** + * @class PlacementAnnealer + * @brief Simulated annealing optimizer for minimizing placement cost via block swaps. + * + * @details This class implements simulated annealing to optimize placement cost by swapping clustered blocks. + * Swaps that reduce the cost are always accepted, while those that increase the cost are accepted + * with a diminishing probability. + * + * The annealing process consists of two nested loops: + * - The **inner loop** (implemented in `placement_inner_loop()`) performs individual swaps, all evaluated at a fixed temperature. + * - The **outer loop** adjusts the temperature and determines whether further iterations are needed. + * + * Usage workflow: + * 1. Call `outer_loop_update_timing_info()` to update timing information. + * 2. Execute `placement_inner_loop()` for swap evaluations. + * 3. Call `outer_loop_update_state()` to check if more outer loop iterations are needed. + * 4. Optionally, use `start_quench()` to set the temperature to zero for a greedy optimization (quenching stage), + * then repeat steps 1 and 2. + * + * Usage example: + * ************************************** + * PlacementAnnealer annealer(...); + * + * do { + * annealer.outer_loop_update_timing_info(); + * annealer.placement_inner_loop(); + * } while (annealer.outer_loop_update_state()); + * + * annealer.start_quench(); + * annealer.outer_loop_update_timing_info(); + * annealer.placement_inner_loop(); + * ************************************** + */ +class PlacementAnnealer { + public: + PlacementAnnealer(const t_placer_opts& placer_opts, + PlacerState& placer_state, + t_placer_costs& costs, + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler, + const t_noc_opts& noc_opts, + vtr::RngContainer& rng, + std::unique_ptr&& move_generator_1, + std::unique_ptr&& move_generator_2, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + int move_lim); + + /** + * @brief Contains the inner loop of the simulated annealing that performs + * a certain number of swaps with a single temperature + */ + void placement_inner_loop(); + + /** + * @brief Updates the setup slacks and criticalities before the inner loop + * of the annealing/quench. It also updates normalization factors for different + * placement cost terms. + */ + void outer_loop_update_timing_info(); + + /** + * @brief Update the annealing state according to the annealing schedule selected. + * @return True->continues the annealing. False->exits the annealing. + */ + bool outer_loop_update_state(); + + /** + * @brief Starts the quench stage in simulated annealing by + * setting the temperature to zero and reverting the move range limit + * to the initial value. + */ + void start_quench(); + + /// @brief Returns the total number iterations (attempted swaps). + int get_total_iteration() const; + + /// @brief Return the RL agent's state + e_agent_state get_agent_state() const; + + /// @brief Returns a constant reference to the annealing state + const t_annealing_state& get_annealing_state() const; + + /// @brief Returns constant references to different statistics objects + std::tuple get_stats() const; + + /** + * @brief Returns MoveAbortionLogger to report how many moves + * were aborted for each reason. + * @return A constant reference to a MoveAbortionLogger object. + */ + const MoveAbortionLogger& get_move_abortion_logger() const; + + private: + + /** + * @brief Pick some block and moves it to another spot. + * + * If the new location is empty, directly move the block. If the new location + * is occupied, switch the blocks. Due to the different sizes of the blocks, + * this block switching may occur for multiple times. It might also cause the + * current swap attempt to abort due to inability to find suitable locations + * for moved blocks. + * + * The move generator will record all the switched blocks in the variable + * `blocks_affected`. Afterwards, the move will be assessed by the chosen + * cost formulation. Currently, there are three ways to assess move cost, + * which are stored in the enum type `t_place_algorithm`. + * + * @return Whether the block swap is accepted, rejected or aborted. + */ + e_move_result try_swap_(MoveGenerator& move_generator, + const t_place_algorithm& place_algorithm, + bool manual_move_enabled); + + /** + * @brief Determines whether a move should be accepted or not. + * Moves with negative delta cost are always accepted, but + * moves that increase the total cost are accepted with a + * probability that diminishes as the temperature decreases. + * @param delta_c The cost difference if the move is accepted. + * @param t The annealer's temperature. + * @return Whether the move is accepted or not. + */ + e_move_result assess_swap_(double delta_c, double t); + + /// @brief Find the starting temperature for the annealing loop. + float estimate_starting_temperature_(); + + private: + const t_placer_opts& placer_opts_; + PlacerState& placer_state_; + /// Stores different placement cost terms + t_placer_costs& costs_; + /// Computes bounding box for each cluster net + NetCostHandler& net_cost_handler_; + /// Computes NoC-related cost terms when NoC optimization are enabled + std::optional& noc_cost_handler_; + /// Contains weighting factors for NoC-related cost terms + const t_noc_opts& noc_opts_; + /// Random number generator for selecting random blocks and random locations + vtr::RngContainer& rng_; + + /// The move generator used in the first state of RL agent and initial temperature computation + std::unique_ptr move_generator_1_; + /// The move generator used in the second state of RL agent + std::unique_ptr move_generator_2_; + /// Handles manual swaps proposed by the user through graphical user interface + ManualMoveGenerator manual_move_generator_; + /// RL agent state + e_agent_state agent_state_; + + const PlaceDelayModel* delay_model_; + PlacerCriticalities* criticalities_; + PlacerSetupSlacks* setup_slacks_; + SetupTimingInfo* timing_info_; + NetPinTimingInvalidator* pin_timing_invalidator_; + std::unique_ptr move_stats_file_; + int outer_crit_iter_count_; + + t_annealing_state annealing_state_; + /// Swap statistics keep record of the number accepted/rejected/aborted swaps. + t_swap_stats swap_stats_; + MoveTypeStat move_type_stats_; + t_placer_statistics placer_stats_; + + /// Keep record of moved blocks and affected pins in a swap + t_pl_blocks_to_be_moved blocks_affected_; + + private: + /** + * @brief The maximum number of swap attempts before invoking the + * once-in-a-while placement legality check as well as floating point + * variables round-offs check. + */ + static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; + + /// Specifies how often (after how many swaps) timing information is recomputed + /// when the annealer isn't in the quench stage + int inner_recompute_limit_; + /// Specifies how often timing information is recomputed when the annealer is in the quench stage + int quench_recompute_limit_; + /// Used to trigger a BB and NoC cost re-computation from scratch + int moves_since_cost_recompute_; + /// Total number of iterations (attempted swaps). + int tot_iter_; + /// Indicates whether the annealer has entered into the quench stage + bool quench_started_; + + void LOG_MOVE_STATS_HEADER(); + void LOG_MOVE_STATS_PROPOSED(); + void LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost, + const char* outcome, const char* reason); + + /** + * @brief Defines the RL agent's reward function factor constant. This factor controls the weight of bb cost + * compared to the timing cost in the agent's reward function. The reward is calculated as + * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost) + */ + static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; +}; \ No newline at end of file diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index 8b8a198d451..e6c8f8e09db 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -1051,11 +1051,10 @@ static void place_all_blocks(const t_placer_opts& placer_opts, auto blk_id_type = cluster_ctx.clb_nlist.block_type(blk_id); -#ifdef VTR_ENABLE_DEBUG_LOGGING - enable_placer_debug(placer_opts, blk_id); -#else - (void)placer_opts; -#endif + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + enable_placer_debug(placer_opts, blk_id); + } + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Popped Block %d\n", size_t(blk_id)); blocks_placed_since_heap_update++; diff --git a/vpr/src/place/move_generator.cpp b/vpr/src/place/move_generator.cpp index 0b68e3dafcc..fd22e8d8b34 100644 --- a/vpr/src/place/move_generator.cpp +++ b/vpr/src/place/move_generator.cpp @@ -51,7 +51,7 @@ void MoveGenerator::calculate_reward_and_process_outcome(const MoveOutcomeStats& } } -void MoveTypeStat::print_placement_move_types_stats() { +void MoveTypeStat::print_placement_move_types_stats() const { VTR_LOG("\n\nPlacement perturbation distribution by block and move type: \n"); VTR_LOG( diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h index 7b05a32651b..e39493e16c6 100644 --- a/vpr/src/place/move_generator.h +++ b/vpr/src/place/move_generator.h @@ -17,7 +17,7 @@ struct MoveOutcomeStats { float delta_bb_cost_abs = std::numeric_limits::quiet_NaN(); float delta_timing_cost_abs = std::numeric_limits::quiet_NaN(); - e_move_result outcome = ABORTED; + e_move_result outcome = e_move_result::ABORTED; float elapsed_time = std::numeric_limits::quiet_NaN(); }; @@ -37,7 +37,28 @@ struct MoveTypeStat { /** * @brief Prints placement perturbation distribution by block and move type. */ - void print_placement_move_types_stats(); + void print_placement_move_types_stats() const; + + inline void incr_blk_type_moves(const t_propose_action& proposed_action) { + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + } + + inline void incr_accept_reject(const t_propose_action& proposed_action, + e_move_result move_result) { + if (move_result == e_move_result::ACCEPTED) { + // if the agent proposed the block type, then collect the block type stat + if (proposed_action.logical_blk_type_index != -1) { + ++accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + } else { + VTR_ASSERT_SAFE(move_result == e_move_result::REJECTED); + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + } + } }; /** diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 6414f602fed..4cf2086c277 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -12,10 +12,20 @@ #include "place_constraints.h" #include "placer_state.h" -//f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true. +//f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. +// When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true. //Note: The flag is only effective if compiled with VTR_ENABLE_DEBUG_LOGGING bool f_placer_breakpoint_reached = false; +//Accessor for f_placer_breakpoint_reached +bool placer_breakpoint_reached() { + return f_placer_breakpoint_reached; +} + +void set_placer_breakpoint_reached(bool flag) { + f_placer_breakpoint_reached = flag; +} + e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to, @@ -487,7 +497,6 @@ bool is_legal_swap_to_location(ClusterBlockId blk, return true; } -#ifdef VTR_ENABLE_DEBUG_LOGGING void enable_placer_debug(const t_placer_opts& placer_opts, ClusterBlockId blk_id) { if (!blk_id.is_valid()) { @@ -535,7 +544,6 @@ void enable_placer_debug(const t_placer_opts& placer_opts, if (active_blk_debug) f_placer_debug &= match_blk; if (active_net_debug) f_placer_debug &= match_net; } -#endif ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int& logical_blk_type_index, @@ -565,11 +573,10 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, b_from = pick_from_block(logical_blk_type_index, rng); } } -#ifdef VTR_ENABLE_DEBUG_LOGGING - enable_placer_debug(placer_opts, b_from); -#else - (void)placer_opts; -#endif + + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + enable_placer_debug(placer_opts, b_from); + } return b_from; } @@ -772,15 +779,6 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, return true; } -//Accessor for f_placer_breakpoint_reached -bool placer_breakpoint_reached() { - return f_placer_breakpoint_reached; -} - -void set_placer_breakpoint_reached(bool flag) { - f_placer_breakpoint_reached = flag; -} - bool find_to_loc_median(t_logical_block_type_ptr blk_type, const t_pl_loc& from_loc, const t_bb* limit_coords, @@ -1267,8 +1265,23 @@ bool intersect_range_limit_with_floorplan_constraints(ClusterBlockId b_from, } std::string e_move_result_to_string(e_move_result move_outcome) { - std::string move_result_to_string[] = {"Rejected", "Accepted", "Aborted"}; - return move_result_to_string[move_outcome]; + switch (move_outcome) { + case e_move_result::REJECTED: + return "Rejected"; + break; + + case e_move_result::ACCEPTED: + return "Accepted"; + break; + + case e_move_result::ABORTED: + return "Aborted"; + break; + + default: + return "Unsupported Move Outcome!"; + break; + } } int find_free_layer(t_logical_block_type_ptr logical_block, diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 3c12a9e36d6..e5555648866 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -19,7 +19,7 @@ constexpr size_t SMALL_NET = 4; /* This is for the placement swap routines. A swap attempt could be * * rejected, accepted or aborted (due to the limitations placed on the * * carry chain support at this point). */ -enum e_move_result { +enum class e_move_result { REJECTED, ACCEPTED, ABORTED @@ -94,18 +94,6 @@ struct t_range_limiters { float dm_rlim; }; -/** - * These variables keep track of the number of swaps - * rejected, accepted or aborted. The total number of swap attempts - * is the sum of the three number. - */ -struct t_swap_stats { - int num_swap_rejected = 0; - int num_swap_accepted = 0; - int num_swap_aborted = 0; - int num_ts_called = 0; -}; - e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to, @@ -463,7 +451,6 @@ t_bb union_2d_bb(const std::vector& tbb_vec); std::pair union_2d_bb_incr(const std::vector& num_edge_vec, const std::vector& bb_vec); -#ifdef VTR_ENABLE_DEBUG_LOGGING /** * @brief If the block ID passed to the placer_debug_net parameter of the command line is equal to blk_id, or if any of the nets * connected to the block share the same ID as the net ID passed to the placer_debug_net parameter of the command line, @@ -474,6 +461,5 @@ std::pair union_2d_bb_incr(const std::vector& num_edge_vec, */ void enable_placer_debug(const t_placer_opts& placer_opts, ClusterBlockId blk_id); -#endif #endif diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index ff3fd41a78e..ac049995347 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -105,12 +105,12 @@ static double wirelength_crossing_count(size_t fanout); NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, - size_t num_nets, bool cube_bb) : cube_bb_(cube_bb) , placer_state_(placer_state) , placer_opts_(placer_opts) { const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); is_multi_layer_ = num_layers > 1; @@ -1677,7 +1677,7 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m check_and_print_cost(new_timing_cost, costs.timing_cost, "timing_cost"); costs.timing_cost = new_timing_cost; } else { - VTR_ASSERT(placer_opts_.place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT(placer_opts_.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); costs.cost = new_bb_cost * costs.bb_cost_norm; } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index e386e95eba4..2b8e59af88f 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -43,11 +43,11 @@ class NetCostHandler { * are affected by a move and data needed per net about where their terminals are in order to quickly (incrementally) update * their wirelength costs. These data structures are (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, * and ts_nets_to_update. - * @param num_nets Number of nets in the netlist used by the placement engine (currently clustered netlist) + * @param placer_opts Contains some parameters that determine how the bounding box is computed. + * @param placer_state Contains information about block locations and net bounding boxes. * @param cube_bb True if the 3D bounding box should be used, false otherwise. - * @param place_cost_exp It is an exponent to which you take the average inverse channel */ - NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, size_t num_nets, bool cube_bb); + NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb); /** * @brief Finds the bb cost from scratch. diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index dc87077295d..69617b278a2 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1,9 +1,6 @@ #include #include #include -#include -#include -#include #include #include @@ -14,28 +11,21 @@ #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_util.h" -#include "vtr_random.h" -#include "vtr_geometry.h" #include "vtr_time.h" #include "vtr_math.h" -#include "vtr_ndmatrix.h" #include "vpr_types.h" #include "vpr_error.h" #include "vpr_utils.h" -#include "vpr_net_pins_matrix.h" #include "globals.h" #include "place.h" +#include "annealer.h" #include "read_place.h" #include "draw.h" -#include "place_and_route.h" -#include "net_delay.h" -#include "timing_place_lookup.h" #include "timing_place.h" #include "read_xml_arch_file.h" #include "echo_files.h" -#include "place_macro.h" #include "histogram.h" #include "place_util.h" #include "analytic_placer.h" @@ -44,12 +34,8 @@ #include "place_timing_update.h" #include "move_transactions.h" #include "move_utils.h" -#include "place_constraints.h" -#include "manual_moves.h" #include "buttons.h" -#include "manual_move_generator.h" - #include "PlacementDelayCalculator.h" #include "VprTimingGraphResolver.h" #include "timing_util.h" @@ -58,117 +44,16 @@ #include "tatum/echo_writer.hpp" #include "tatum/TimingReporter.hpp" -#include "placer_breakpoint.h" #include "RL_agent_util.h" #include "place_checkpoint.h" #include "clustered_netlist_utils.h" -#include "cluster_placement.h" - #include "noc_place_utils.h" #include "net_cost_handler.h" #include "placer_state.h" -/* define the RL agent's reward function factor constant. This factor controls the weight of bb cost * - * compared to the timing cost in the agent's reward function. The reward is calculated as * - * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost) - */ -static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "draw_types.h" -# include "draw_global.h" -# include "draw_color.h" -#endif - -/************** Types and defines local to place.c ***************************/ -/* This defines the maximum number of swap attempts before invoking the * - * once-in-a-while placement legality check as well as floating point * - * variables round-offs check. */ -static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; - -constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); -constexpr float INVALID_COST = std::numeric_limits::quiet_NaN(); - -/********************** Variables local to place.c ***************************/ - - -std::unique_ptr f_move_stats_file(nullptr, - vtr::fclose); - -#ifdef VTR_ENABLE_DEBUG_LOGGIING -# define LOG_MOVE_STATS_HEADER() \ - do { \ - if (f_move_stats_file) { \ - fprintf(f_move_stats_file.get(), \ - "temp,from_blk,to_blk,from_type,to_type," \ - "blk_count," \ - "delta_cost,delta_bb_cost,delta_td_cost," \ - "outcome,reason\n"); \ - } \ - } while (false) - -# define LOG_MOVE_STATS_PROPOSED(t, affected_blocks) \ - do { \ - if (f_move_stats_file) { \ - auto& place_ctx = g_vpr_ctx.placement(); \ - auto& cluster_ctx = g_vpr_ctx.clustering(); \ - ClusterBlockId b_from = affected_blocks.moved_blocks[0].block_num; \ - \ - t_pl_loc to = affected_blocks.moved_blocks[0].new_loc; \ - ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; \ - \ - t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); \ - t_logical_block_type_ptr to_type = nullptr; \ - if (b_to) { \ - to_type = cluster_ctx.clb_nlist.block_type(b_to); \ - } \ - \ - fprintf(f_move_stats_file.get(), \ - "%g," \ - "%d,%d," \ - "%s,%s," \ - "%d,", \ - t, \ - int(b_from), int(b_to), \ - from_type->name, (to_type ? to_type->name : "EMPTY"), \ - affected_blocks.moved_blocks.size()); \ - } \ - } while (false) - -# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason) \ - do { \ - if (f_move_stats_file) { \ - fprintf(f_move_stats_file.get(), \ - "%g,%g,%g," \ - "%s,%s\n", \ - delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason); \ - } \ - } while (false) - -#else - -# define LOG_MOVE_STATS_HEADER() \ - do { \ - fprintf(f_move_stats_file.get(), \ - "VTR_ENABLE_DEBUG_LOGGING disabled " \ - "-- No move stats recorded\n"); \ - } while (false) - -# define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \ - do { \ - } while (false) - -# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason) \ - do { \ - } while (false) - -#endif /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE @@ -192,29 +77,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac static void free_placement_structs(); -static e_move_result try_swap(const t_annealing_state* state, - t_placer_costs* costs, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - SetupTimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - const t_place_algorithm& place_algorithm, - float timing_bb_factor, - bool manual_move_enabled, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler, - vtr::RngContainer& rng); - - static void check_place(const t_placer_costs& costs, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, @@ -231,89 +93,8 @@ static int check_placement_costs(const t_placer_costs& costs, PlacerState& placer_state, NetCostHandler& net_cost_handler); -static float starting_t(const t_annealing_state* state, - t_placer_costs* costs, - t_annealing_sched annealing_sched, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - SetupTimingInfo* timing_info, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler, - vtr::RngContainer& rng); - static int count_connections(); -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerState& placer_state); - -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerTimingContext& p_timing_ctx); - -static void invalidate_affected_connections( - const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info); - -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, - const PlacerState& placer_state); - -static e_move_result assess_swap(double delta_c, double t, vtr::RngContainer& rng); - -static void update_placement_cost_normalization_factors(t_placer_costs* costs, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::optional& noc_cost_handler); - -static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); - -static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - t_placer_costs* costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - PlacerState& placer_state, - const std::optional& noc_cost_handler); - -static void placement_inner_loop(const t_annealing_state* state, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - int inner_recompute_limit, - t_placer_statistics* stats, - t_placer_costs* costs, - int* moves_since_cost_recompute, - NetPinTimingInvalidator* pin_timing_invalidator, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info, - const t_place_algorithm& place_algorithm, - MoveTypeStat& move_type_stat, - float timing_bb_factor, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler, - vtr::RngContainer& rng); - static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const SetupTimingInfo& timing_info, @@ -346,7 +127,6 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry); /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, - t_annealing_sched annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, @@ -371,18 +151,14 @@ void try_place(const Netlist<>& net_list, auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - int tot_iter, moves_since_cost_recompute, num_connections, outer_crit_iter_count; - float first_crit_exponent; - - t_placer_costs costs(placer_opts.place_algorithm); + t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc); tatum::TimingPathInfo critical_path; float sTNS = NAN; float sWNS = NAN; char msg[vtr::bufsize]; - t_placer_statistics stats; t_placement_checkpoint placement_checkpoint; @@ -393,11 +169,6 @@ void try_place(const Netlist<>& net_list, std::unique_ptr placer_criticalities; std::unique_ptr pin_timing_invalidator; - t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size()); - - // Swap statistics keep record of the number accepted/rejected/aborted swaps. - t_swap_stats swap_stats; - if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_delay_model(net_list, @@ -410,8 +181,7 @@ void try_place(const Netlist<>& net_list, is_flat); if (isEchoFileEnabled(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL)) { - place_delay_model->dump_echo( - getEchoFileName(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL)); + place_delay_model->dump_echo(getEchoFileName(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL)); } } @@ -422,10 +192,9 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - int move_lim = (int)(annealing_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); + int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); - PlacerState placer_state; - auto& place_move_ctx = placer_state.mutable_move(); + PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven(), cube_bb); auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); const auto& p_timing_ctx = placer_state.timing(); const auto& p_runtime_ctx = placer_state.runtime(); @@ -443,8 +212,6 @@ void try_place(const Netlist<>& net_list, } #endif - ManualMoveGenerator manual_move_generator(placer_state, rng); - vtr::ScopedStartFinishTimer timer("Placement"); if (noc_opts.noc) { @@ -492,9 +259,7 @@ void try_place(const Netlist<>& net_list, if (placer_opts.place_algorithm.is_timing_driven()) { costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ - - num_connections = count_connections(); + int num_connections = count_connections(); VTR_LOG("\n"); VTR_LOG("There are %d point to point connections in this circuit.\n", num_connections); @@ -514,8 +279,7 @@ void try_place(const Netlist<>& net_list, placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin); placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin); - timing_info = make_setup_timing_info(placement_delay_calc, - placer_opts.timing_update_type); + timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type); placer_setup_slacks = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); @@ -532,7 +296,7 @@ void try_place(const Netlist<>& net_list, //First time compute timing and costs, compute from scratch PlaceCritParams crit_params; - crit_params.crit_exponent = first_crit_exponent; + crit_params.crit_exponent = placer_opts.td_place_exp_first; crit_params.crit_limit = placer_opts.place_crit_limit; initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(), @@ -556,27 +320,21 @@ void try_place(const Netlist<>& net_list, *timing_info, debug_tnode); } - outer_crit_iter_count = 1; - /* Initialize the normalization factors. Calling costs.update_norm_factors() * * here would fail the golden results of strong_sdc benchmark */ costs.timing_cost_norm = 1 / costs.timing_cost; costs.bb_cost_norm = 1 / costs.bb_cost; } else { - VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); /* Total cost is the same as wirelength cost normalized*/ costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); costs.bb_cost_norm = 1 / costs.bb_cost; /* Timing cost and normalization factors are not used */ + constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); costs.timing_cost = INVALID_COST; costs.timing_cost_norm = INVALID_COST; - - /* Other initializations */ - outer_crit_iter_count = 0; - num_connections = 0; - first_crit_exponent = 0; } if (noc_opts.noc) { @@ -592,7 +350,7 @@ void try_place(const Netlist<>& net_list, } // set the starting total placement cost - costs.cost = get_total_cost(&costs, placer_opts, noc_opts); + costs.cost = costs.get_total_cost(placer_opts, noc_opts); //Sanity check that initial placement is legal check_place(costs, @@ -652,66 +410,6 @@ void try_place(const Netlist<>& net_list, print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } - int first_move_lim = get_initial_move_lim(placer_opts, annealing_sched); - - int inner_recompute_limit; - if (placer_opts.inner_loop_recompute_divider != 0) { - inner_recompute_limit = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); - } else { - /*don't do an inner recompute */ - inner_recompute_limit = first_move_lim + 1; - } - - /* calculate the number of moves in the quench that we should recompute timing after based on the value of * - * the commandline option quench_recompute_divider */ - int quench_recompute_limit; - if (placer_opts.quench_recompute_divider != 0) { - quench_recompute_limit = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); - } else { - /*don't do an quench recompute */ - quench_recompute_limit = first_move_lim + 1; - } - - //allocate helper vectors that are used by many move generators - place_move_ctx.X_coord.resize(10, 0); - place_move_ctx.Y_coord.resize(10, 0); - place_move_ctx.layer_coord.resize(10, 0); - - //allocate move type statistics vectors - MoveTypeStat move_type_stat; - move_type_stat.blk_type_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); - move_type_stat.accepted_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); - move_type_stat.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); - - /* Get the first range limiter */ - float first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - place_move_ctx.first_rlim = first_rlim; - - t_annealing_state state(annealing_sched, - EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved - first_rlim, - first_move_lim, - first_crit_exponent, - device_ctx.grid.get_num_layers()); - - /* Update the starting temperature for placement annealing to a more appropriate value */ - state.t = starting_t(&state, &costs, annealing_sched, - place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), timing_info.get(), *move_generator, - manual_move_generator, pin_timing_invalidator.get(), - blocks_affected, placer_opts, noc_opts, move_type_stat, - swap_stats, placer_state, net_cost_handler, noc_cost_handler, rng); - - if (!placer_opts.move_stats_file.empty()) { - f_move_stats_file = std::unique_ptr( - vtr::fopen(placer_opts.move_stats_file.c_str(), "w"), - vtr::fclose); - LOG_MOVE_STATS_HEADER(); - } - - tot_iter = 0; - moves_since_cost_recompute = 0; - bool skip_anneal = false; #ifdef ENABLE_ANALYTIC_PLACE @@ -722,13 +420,12 @@ void try_place(const Netlist<>& net_list, } #endif /* ENABLE_ANALYTIC_PLACE */ - //RL agent state definition - e_agent_state agent_state = e_agent_state::EARLY_IN_THE_ANNEAL; - - std::unique_ptr current_move_generator; + PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, + noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(), + placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); - //Define the timing bb weight factor for the agent's reward function - float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT; + const t_annealing_state& annealing_state = annealer.get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); if (!skip_anneal) { //Table header @@ -739,118 +436,54 @@ void try_place(const Netlist<>& net_list, do { vtr::Timer temperature_timer; - outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections, - state.crit_exponent, &outer_crit_iter_count, - place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), placer_state, noc_cost_handler); + annealer.outer_loop_update_timing_info(); if (placer_opts.place_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); - //see if we should save the current placement solution as a checkpoint - - if (placer_opts.place_checkpointing - && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) { + // see if we should save the current placement solution as a checkpoint + if (placer_opts.place_checkpointing && annealer.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(), placement_checkpoint, timing_info, costs, critical_path.delay()); } } - //move the appropriate move_generator to be the current used move generator - assign_current_move_generator(move_generator, move_generator2, - agent_state, placer_opts, false, current_move_generator); - - //do a complete inner loop iteration - placement_inner_loop(&state, placer_opts, noc_opts, - inner_recompute_limit, - &stats, &costs, &moves_since_cost_recompute, - pin_timing_invalidator.get(), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), - *current_move_generator, manual_move_generator, - blocks_affected, timing_info.get(), - placer_opts.place_algorithm, move_type_stat, - timing_bb_factor, swap_stats, placer_state, - net_cost_handler, noc_cost_handler, rng); - - - //move the update used move_generator to its original variable - update_move_generator(move_generator, move_generator2, agent_state, - placer_opts, false, current_move_generator); - - tot_iter += state.move_lim; - ++state.num_temps; + // do a complete inner loop iteration + annealer.placement_inner_loop(); - print_place_status(state, stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, tot_iter, + print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), + critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), noc_opts.noc, costs.noc_cost_terms); - if (placer_opts.place_algorithm.is_timing_driven() - && placer_opts.place_agent_multistate - && agent_state == e_agent_state::EARLY_IN_THE_ANNEAL) { - if (state.alpha < 0.85 && state.alpha > 0.6) { - agent_state = e_agent_state::LATE_IN_THE_ANNEAL; - VTR_LOG("Agent's 2nd state: \n"); - } - } - sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", - costs.cost, costs.bb_cost, costs.timing_cost, state.t); - update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, - timing_info); + costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); + update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info); //#ifdef VERBOSE // if (getEchoEnabled()) { // print_clb_placement("first_iteration_clb_placement.echo"); // } //#endif - } while (state.outer_loop_update(stats.success_rate, costs, placer_opts, - annealing_sched)); + } while (annealer.outer_loop_update_state()); /* Outer loop of the simulated annealing ends */ } //skip_anneal ends - /* Start Quench */ - state.t = 0; //Freeze out: only accept solutions that improve placement. - state.move_lim = state.move_lim_max; //Revert the move limit to initial value. + // Start Quench + annealer.start_quench(); auto pre_quench_timing_stats = timing_ctx.stats; { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections, - state.crit_exponent, &outer_crit_iter_count, - place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), placer_state, noc_cost_handler); - - //move the appropriate move_generator to be the current used move generator - assign_current_move_generator(move_generator, move_generator2, - agent_state, placer_opts, true, current_move_generator); + annealer.outer_loop_update_timing_info(); /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ - placement_inner_loop(&state, placer_opts, noc_opts, - quench_recompute_limit, - &stats, &costs, &moves_since_cost_recompute, - pin_timing_invalidator.get(), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), - *current_move_generator, manual_move_generator, - blocks_affected, timing_info.get(), - placer_opts.place_quench_algorithm, move_type_stat, - timing_bb_factor, swap_stats, placer_state, - net_cost_handler, noc_cost_handler, rng); - - - //move the update used move_generator to its original variable - update_move_generator(move_generator, move_generator2, agent_state, - placer_opts, true, current_move_generator); - - tot_iter += state.move_lim; - ++state.num_temps; + annealer.placement_inner_loop(); if (placer_opts.place_quench_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); @@ -858,15 +491,15 @@ void try_place(const Netlist<>& net_list, sWNS = timing_info->setup_worst_negative_slack(); } - print_place_status(state, stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, tot_iter, + print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), + critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), noc_opts.noc, costs.noc_cost_terms); } auto post_quench_timing_stats = timing_ctx.stats; //Final timing analysis PlaceCritParams crit_params; - crit_params.crit_exponent = state.crit_exponent; + crit_params.crit_exponent = annealing_state.crit_exponent; crit_params.crit_limit = placer_opts.place_crit_limit; if (placer_opts.place_algorithm.is_timing_driven()) { @@ -886,7 +519,7 @@ void try_place(const Netlist<>& net_list, if (placer_opts.placement_saves_per_temperature >= 1) { std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - state.num_temps + 1, 0); + annealing_state.num_temps + 1, 0); VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } @@ -917,7 +550,7 @@ void try_place(const Netlist<>& net_list, //Some stats VTR_LOG("\n"); VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); - blocks_affected.move_abortion_logger.report_aborted_moves(); + annealer.get_move_abortion_logger().report_aborted_moves(); if (placer_opts.place_algorithm.is_timing_driven()) { //Final timing estimate @@ -970,9 +603,9 @@ void try_place(const Netlist<>& net_list, // Print out swap statistics print_resources_utilization(blk_loc_registry); - print_placement_swaps_stats(state, swap_stats); + print_placement_swaps_stats(annealing_state, swap_stats); - move_type_stat.print_placement_move_types_stats(); + move_type_stats.print_placement_move_types_stats(); if (noc_opts.noc) { write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs()); @@ -980,10 +613,8 @@ void try_place(const Netlist<>& net_list, free_placement_structs(); - print_timing_stats("Placement Quench", post_quench_timing_stats, - pre_quench_timing_stats); - print_timing_stats("Placement Total ", timing_ctx.stats, - pre_place_timing_stats); + print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); + print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, @@ -994,153 +625,6 @@ void try_place(const Netlist<>& net_list, copy_locs_to_global_state(blk_loc_registry); } -/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ -static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - t_placer_costs* costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - PlacerState& placer_state, - const std::optional& noc_cost_handler) { - if (placer_opts.place_algorithm.is_timing_driven()) { - /*at each temperature change we update these values to be used */ - /*for normalizing the tradeoff between timing and wirelength (bb) */ - if (*outer_crit_iter_count >= placer_opts.recompute_crit_iter - || placer_opts.inner_loop_recompute_divider != 0) { -#ifdef VERBOSE - VTR_LOG("Outer loop recompute criticalities\n"); -#endif - num_connections = std::max(num_connections, 1); //Avoid division by zero - VTR_ASSERT(num_connections > 0); - - PlaceCritParams crit_params; - crit_params.crit_exponent = crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, criticalities, setup_slacks, - pin_timing_invalidator, timing_info, costs, placer_state); - - *outer_crit_iter_count = 0; - } - (*outer_crit_iter_count)++; - } - - /* Update the cost normalization factors */ - update_placement_cost_normalization_factors(costs, placer_opts, noc_opts, noc_cost_handler); -} - -/* Function which contains the inner loop of the simulated annealing */ -static void placement_inner_loop(const t_annealing_state* state, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - int inner_recompute_limit, - t_placer_statistics* stats, - t_placer_costs* costs, - int* moves_since_cost_recompute, - NetPinTimingInvalidator* pin_timing_invalidator, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info, - const t_place_algorithm& place_algorithm, - MoveTypeStat& move_type_stat, - float timing_bb_factor, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler, - vtr::RngContainer& rng) { - //How many times have we dumped placement to a file this temperature? - int inner_placement_save_count = 0; - - stats->reset(); - - bool manual_move_enabled = false; - - /* Inner loop begins */ - for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) { - e_move_result swap_result = try_swap(state, costs, move_generator, - manual_move_generator, timing_info, pin_timing_invalidator, - blocks_affected, delay_model, criticalities, setup_slacks, - placer_opts, noc_opts, move_type_stat, place_algorithm, - timing_bb_factor, manual_move_enabled, swap_stats, - placer_state, net_cost_handler, noc_cost_handler, rng); - - if (swap_result == ACCEPTED) { - /* Move was accepted. Update statistics that are useful for the annealing schedule. */ - stats->single_swap_update(*costs); - swap_stats.num_swap_accepted++; - } else if (swap_result == ABORTED) { - swap_stats.num_swap_aborted++; - } else { // swap_result == REJECTED - swap_stats.num_swap_rejected++; - } - - if (place_algorithm.is_timing_driven()) { - /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? - * We do this only once in a while, since it is expensive. - */ - if (inner_crit_iter_count >= inner_recompute_limit - && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */ - - inner_crit_iter_count = 0; -#ifdef VERBOSE - VTR_LOG("Inner loop recompute criticalities\n"); -#endif - - PlaceCritParams crit_params; - crit_params.crit_exponent = state->crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, criticalities, - setup_slacks, pin_timing_invalidator, - timing_info, costs, placer_state); - } - inner_crit_iter_count++; - } - - /* Lines below prevent too much round-off error from accumulating - * in the cost over many iterations (due to incremental updates). - * This round-off can lead to error checks failing because the cost - * is different from what you get when you recompute from scratch. - */ - ++(*moves_since_cost_recompute); - if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { - net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs); - - if (noc_cost_handler.has_value()) { - noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs); - } - - *moves_since_cost_recompute = 0; - } - - if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0 - && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - state->num_temps + 1, inner_placement_save_count); - VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", - inner_iter, state->move_lim, filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs()); - ++inner_placement_save_count; - } - } - - /* Calculate the success_rate and std_dev of the costs. */ - stats->calc_iteration_stats(*costs, state->move_lim); -} - /*only count non-global connections */ static int count_connections() { auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -1155,448 +639,7 @@ static int count_connections() { count += cluster_ctx.clb_nlist.net_sinks(net_id).size(); } - return (count); -} - -///@brief Find the starting temperature for the annealing loop. -static float starting_t(const t_annealing_state* state, - t_placer_costs* costs, - t_annealing_sched annealing_sched, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - SetupTimingInfo* timing_info, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler, - vtr::RngContainer& rng) { - if (annealing_sched.type == USER_SCHED) { - return (annealing_sched.init_t); - } - - auto& cluster_ctx = g_vpr_ctx.clustering(); - - /* Use to calculate the average of cost when swap is accepted. */ - int num_accepted = 0; - - /* Use double types to avoid round off. */ - double av = 0., sum_of_squares = 0.; - - /* Determines the block swap loop count. */ - int move_lim = std::min(state->move_lim_max, - (int)cluster_ctx.clb_nlist.blocks().size()); - - bool manual_move_enabled = false; - - for (int i = 0; i < move_lim; i++) { -#ifndef NO_GRAPHICS - //Checks manual move flag for manual move feature - t_draw_state* draw_state = get_draw_state_vars(); - if (draw_state->show_graphics) { - manual_move_enabled = manual_move_is_selected(); - } -#endif /*NO_GRAPHICS*/ - - //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack - e_move_result swap_result = try_swap(state, costs, move_generator, - manual_move_generator, timing_info, pin_timing_invalidator, - blocks_affected, delay_model, criticalities, setup_slacks, - placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm, - REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, swap_stats, - placer_state, net_cost_handler, noc_cost_handler, rng); - - - if (swap_result == ACCEPTED) { - num_accepted++; - av += costs->cost; - sum_of_squares += costs->cost * costs->cost; - swap_stats.num_swap_accepted++; - } else if (swap_result == ABORTED) { - swap_stats.num_swap_aborted++; - } else { - swap_stats.num_swap_rejected++; - } - } - - /* Take the average of the accepted swaps' cost values. */ - av = num_accepted > 0 ? (av / num_accepted) : 0.; - - /* Get the standard deviation. */ - double std_dev = get_std_dev(num_accepted, sum_of_squares, av); - - /* Print warning if not all swaps are accepted. */ - if (num_accepted != move_lim) { - VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n", - num_accepted, move_lim); - } - -#ifdef VERBOSE - /* Print stats related to finding the initital temp. */ - VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev); -#endif - - // Improved initial placement uses a fast SA for NoC routers and centroid placement - // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement - float init_temp = std_dev / 64; - - return init_temp; -} - -/** - * @brief Pick some block and moves it to another spot. - * - * If the new location is empty, directly move the block. If the new location - * is occupied, switch the blocks. Due to the different sizes of the blocks, - * this block switching may occur for multiple times. It might also cause the - * current swap attempt to abort due to inability to find suitable locations - * for moved blocks. - * - * The move generator will record all the switched blocks in the variable - * `blocks_affected`. Afterwards, the move will be assessed by the chosen - * cost formulation. Currently, there are three ways to assess move cost, - * which are stored in the enum type `t_place_algorithm`. - * - * @return Whether the block swap is accepted, rejected or aborted. - */ -static e_move_result try_swap(const t_annealing_state* state, - t_placer_costs* costs, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - SetupTimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - const t_place_algorithm& place_algorithm, - float timing_bb_factor, - bool manual_move_enabled, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler, - vtr::RngContainer& rng) { - /* Picks some block and moves it to another spot. If this spot is * - * occupied, switch the blocks. Assess the change in cost function. * - * rlim is the range limiter. * - * Returns whether the swap is accepted, rejected or aborted. * - * Passes back the new value of the cost functions. */ - auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); - - float rlim_escape_fraction = placer_opts.rlim_escape_fraction; - float timing_tradeoff = placer_opts.timing_tradeoff; - - PlaceCritParams crit_params; - crit_params.crit_exponent = state->crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - // move type and block type chosen by the agent - t_propose_action proposed_action{e_move_type::UNIFORM, -1}; - - swap_stats.num_ts_called++; - - MoveOutcomeStats move_outcome_stats; - - /* I'm using negative values of proposed_net_cost as a flag, * - * so DO NOT use cost functions that can go negative. */ - - double delta_c = 0; //Change in cost due to this swap. - double bb_delta_c = 0; //Change in the bounding box (wiring) cost. - double timing_delta_c = 0; //Change in the timing cost (delay * criticality). - - // Determine whether we need to force swap two router blocks - bool router_block_move = false; - if (noc_opts.noc) { - router_block_move = check_for_router_swap(noc_opts.noc_swap_percentage, rng); - } - - /* Allow some fraction of moves to not be restricted by rlim, */ - /* in the hopes of better escaping local minima. */ - float rlim; - if (rlim_escape_fraction > 0. && rng.frand() < rlim_escape_fraction) { - rlim = std::numeric_limits::infinity(); - } else { - rlim = state->rlim; - } - - e_create_move create_move_outcome = e_create_move::ABORT; - - //When manual move toggle button is active, the manual move window asks the user for input. - if (manual_move_enabled) { -#ifndef NO_GRAPHICS - create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, - proposed_action.move_type, rlim, placer_opts, - criticalities); -#else //NO_GRAPHICS - //Cast to void to explicitly avoid warning. - (void)manual_move_generator; -#endif //NO_GRAPHICS - } else if (router_block_move) { - // generate a move where two random router blocks are swapped - create_move_outcome = propose_router_swap(blocks_affected, rlim, placer_state.blk_loc_registry(), rng); - proposed_action.move_type = e_move_type::UNIFORM; - } else { - //Generate a new move (perturbation) used to explore the space of possible placements - create_move_outcome = move_generator.propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); - } - - if (proposed_action.logical_blk_type_index != -1 && !manual_move_enabled) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - LOG_MOVE_STATS_PROPOSED(t, blocks_affected); - - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", - costs->cost, costs->bb_cost, costs->timing_cost); - - e_move_result move_outcome = e_move_result::ABORTED; - - if (create_move_outcome == e_create_move::ABORT) { - LOG_MOVE_STATS_OUTCOME(std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN(), "ABORTED", - "illegal move"); - - move_outcome = ABORTED; - - } else { - VTR_ASSERT(create_move_outcome == e_create_move::VALID); - - /* - * To make evaluating the move simpler (e.g. calculating changed bounding box), - * we first move the blocks to their new locations (apply the move to - * blk_loc_registry.block_locs) and then compute the change in cost. If the move - * is accepted, the inverse look-up in place_ctx.grid_blocks is updated - * (committing the move). If the move is rejected, the blocks are returned to - * their original positions (reverting blk_loc_registry.block_locs to its original state). - * - * Note that the inverse look-up place_ctx.grid_blocks is only updated after - * move acceptance is determined, so it should not be used when evaluating a move. - */ - - /* Update the block positions */ - blk_loc_registry.apply_move_blocks(blocks_affected); - - //Find all the nets affected by this swap and update the wiring costs. - //This cost value doesn't depend on the timing info. - // - //Also find all the pins affected by the swap, and calculates new connection - //delays and timing costs and store them in proposed_* data structures. - net_cost_handler.find_affected_nets_and_update_costs(delay_model, criticalities, blocks_affected, - bb_delta_c, timing_delta_c); - - //For setup slack analysis, we first do a timing analysis to get the newest - //slack values resulted from the proposed block moves. If the move turns out - //to be accepted, we keep the updated slack values and commit the block moves. - //If rejected, we reject the proposed block moves and revert this timing analysis. - if (place_algorithm == SLACK_TIMING_PLACE) { - /* Invalidates timing of modified connections for incremental timing updates. */ - invalidate_affected_connections(blocks_affected, - pin_timing_invalidator, timing_info); - - /* Update the connection_timing_cost and connection_delay * - * values from the temporary values. */ - commit_td_cost(blocks_affected, placer_state); - - /* Update timing information. Since we are analyzing setup slacks, * - * we only update those values and keep the criticalities stale * - * so as not to interfere with the original timing driven algorithm. * - * - * Note: the timing info must be updated after applying block moves * - * and committing the timing driven delays and costs. * - * If we wish to revert this timing update due to move rejection, * - * we need to revert block moves and restore the timing values. */ - criticalities->disable_update(); - setup_slacks->enable_update(); - update_timing_classes(crit_params, timing_info, criticalities, - setup_slacks, pin_timing_invalidator, placer_state); - - /* Get the setup slack analysis cost */ - //TODO: calculate a weighted average of the slack cost and wiring cost - delta_c = analyze_setup_slack_cost(setup_slacks, placer_state) * costs->timing_cost_norm; - } else if (place_algorithm == CRITICALITY_TIMING_PLACE) { - /* Take delta_c as a combination of timing and wiring cost. In - * addition to `timing_tradeoff`, we normalize the cost values */ - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, " - "timing_delta_c %e, timing_cost_norm %e\n", - bb_delta_c, - costs->bb_cost_norm, - timing_tradeoff, - timing_delta_c, - costs->timing_cost_norm); - delta_c = (1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm - + timing_tradeoff * timing_delta_c * costs->timing_cost_norm; - } else { - VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tMove bb_delta_c %e, bb_cost_norm %e\n", - bb_delta_c, - costs->bb_cost_norm); - delta_c = bb_delta_c * costs->bb_cost_norm; - } - - NocCostTerms noc_delta_c; // change in NoC cost - /* Update the NoC data structure and costs*/ - if (noc_opts.noc) { - VTR_ASSERT_SAFE(noc_cost_handler.has_value()); - noc_cost_handler->find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c); - - // Include the NoC delta costs in the total cost change for this swap - delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts); - } - - /* 1 -> move accepted, 0 -> rejected. */ - move_outcome = assess_swap(delta_c, state->t, rng); - - //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move. -#ifndef NO_GRAPHICS - if (manual_move_enabled) { - move_outcome = pl_do_manual_move(delta_c, timing_delta_c, bb_delta_c, move_outcome); - } -#endif //NO_GRAPHICS - - if (move_outcome == ACCEPTED) { - costs->cost += delta_c; - costs->bb_cost += bb_delta_c; - - if (place_algorithm == SLACK_TIMING_PLACE) { - /* Update the timing driven cost as usual */ - costs->timing_cost += timing_delta_c; - - //Commit the setup slack information - //The timing delay and cost values should be committed already - commit_setup_slacks(setup_slacks, placer_state); - } - - if (place_algorithm == CRITICALITY_TIMING_PLACE) { - costs->timing_cost += timing_delta_c; - - /* Invalidates timing of modified connections for incremental * - * timing updates. These invalidations are accumulated for a * - * big timing update in the outer loop. */ - invalidate_affected_connections(blocks_affected, - pin_timing_invalidator, timing_info); - - /* Update the connection_timing_cost and connection_delay * - * values from the temporary values. */ - commit_td_cost(blocks_affected, placer_state); - } - - /* Update net cost functions and reset flags. */ - net_cost_handler.update_move_nets(); - - /* Update clb data structures since we kept the move. */ - blk_loc_registry.commit_move_blocks(blocks_affected); - - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - if (noc_opts.noc){ - noc_cost_handler->commit_noc_costs(); - *costs += noc_delta_c; - } - - //Highlights the new block when manual move is selected. -#ifndef NO_GRAPHICS - if (manual_move_enabled) { - manual_move_highlight_new_block_location(); - } -#endif //NO_GRAPHICS - - } else { - VTR_ASSERT_SAFE(move_outcome == REJECTED); - - /* Reset the net cost function flags first. */ - net_cost_handler.reset_move_nets(); - - /* Restore the blk_loc_registry.block_locs data structures to their state before the move. */ - blk_loc_registry.revert_move_blocks(blocks_affected); - - if (place_algorithm == SLACK_TIMING_PLACE) { - /* Revert the timing delays and costs to pre-update values. */ - /* These routines must be called after reverting the block moves. */ - //TODO: make this process incremental - comp_td_connection_delays(delay_model, placer_state); - comp_td_costs(delay_model, *criticalities, placer_state, &costs->timing_cost); - - /* Re-invalidate the affected sink pins since the proposed * - * move is rejected, and the same blocks are reverted to * - * their original positions. */ - invalidate_affected_connections(blocks_affected, - pin_timing_invalidator, timing_info); - - /* Revert the timing update */ - update_timing_classes(crit_params, timing_info, criticalities, - setup_slacks, pin_timing_invalidator, placer_state); - - VTR_ASSERT_SAFE_MSG( - verify_connection_setup_slacks(setup_slacks, placer_state), - "The current setup slacks should be identical to the values before the try swap timing info update."); - } - - if (place_algorithm == CRITICALITY_TIMING_PLACE) { - /* Unstage the values stored in proposed_* data structures */ - revert_td_cost(blocks_affected, placer_state.mutable_timing()); - } - - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - /* Revert the traffic flow routes within the NoC*/ - if (noc_opts.noc) { - noc_cost_handler->revert_noc_traffic_flow_routes(blocks_affected); - } - } - - move_outcome_stats.delta_cost_norm = delta_c; - move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs->bb_cost_norm; - move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs->timing_cost_norm; - - move_outcome_stats.delta_bb_cost_abs = bb_delta_c; - move_outcome_stats.delta_timing_cost_abs = timing_delta_c; - - LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); - } - move_outcome_stats.outcome = move_outcome; - - // If we force a router block move then it was not proposed by the - // move generator, so we should not calculate the reward and update - // the move generators status since this outcome is not a direct - // consequence of the move generator - if (!manual_move_enabled && !router_block_move) { - move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor); - } - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# ifndef NO_GRAPHICS - stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c); -# endif -#endif - - /* Clear the data structure containing block move info */ - blocks_affected.clear_move_blocks(); - -#if 0 - // Check that each accepted swap yields a valid placement. This will - // greatly slow the placer, but can debug some issues. - check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts); -#endif - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", - costs->cost, costs->bb_cost, costs->timing_cost); - return move_outcome; + return count; } static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, @@ -1604,7 +647,7 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, bool cube_bb; const int number_layers = g_vpr_ctx.device().grid.get_num_layers(); - if (place_bb_mode == AUTO_BB) { + if (place_bb_mode == e_place_bounding_box_mode::AUTO_BB) { // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen. if (number_layers > 1 && inter_layer_connections_limited_to_opin(rr_graph)) { @@ -1612,235 +655,18 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, } else { cube_bb = true; } - } else if (place_bb_mode == CUBE_BB) { + } else if (place_bb_mode == e_place_bounding_box_mode::CUBE_BB) { // The user has specifically asked for CUBE_BB cube_bb = true; } else { // The user has specifically asked for PER_LAYER_BB - VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB); + VTR_ASSERT_SAFE(place_bb_mode == e_place_bounding_box_mode::PER_LAYER_BB); cube_bb = false; } return cube_bb; } -/** - * @brief Updates all the cost normalization factors during the outer - * loop iteration of the placement. At each temperature change, these - * values are updated so that we can balance the tradeoff between the - * different placement cost components (timing, wirelength and NoC). - * Depending on the placement mode the corresponding normalization factors are - * updated. - * - * @param costs Contains the normalization factors which need to be updated - * @param placer_opts Determines the placement mode - * @param noc_opts Determines if placement includes the NoC - * @param noc_cost_handler Computes normalization factors for NoC-related cost terms - */ -static void update_placement_cost_normalization_factors(t_placer_costs* costs, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::optional& noc_cost_handler) { - /* Update the cost normalization factors */ - costs->update_norm_factors(); - - // update the noc normalization factors if the placement includes the NoC - if (noc_opts.noc) { - noc_cost_handler->update_noc_normalization_factors(*costs); - } - - // update the current total placement cost - costs->cost = get_total_cost(costs, placer_opts, noc_opts); -} - -/** - * @brief Compute the total normalized cost for a given placement. This - * computation will vary depending on the placement modes. - * - * @param costs The current placement cost components and their normalization - * factors - * @param placer_opts Determines the placement mode - * @param noc_opts Determines if placement includes the NoC - * @return double The computed total cost of the current placement - */ -static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { - double total_cost = 0.0; - - if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { - // in bounding box mode we only care about wirelength - total_cost = costs->bb_cost * costs->bb_cost_norm; - } else if (placer_opts.place_algorithm.is_timing_driven()) { - // in timing mode we include both wirelength and timing costs - total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm); - } - - if (noc_opts.noc) { - // in noc mode we include noc aggregate bandwidth and noc latency - total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts); - } - - return total_cost; -} - -/** - * @brief Check if the setup slack has gotten better or worse due to block swap. - * - * Get all the modified slack values via the PlacerSetupSlacks class, and compare - * then with the original values at these connections. Sort them and compare them - * one by one, and return the difference of the first different pair. - * - * If the new slack value is larger(better), than return a negative value so that - * the move will be accepted. If the new slack value is smaller(worse), return a - * positive value so that the move will be rejected. - * - * If no slack values have changed, then return an arbitrary positive number. A - * move resulting in no change in the slack values should probably be unnecessary. - * - * The sorting is need to prevent in the unlikely circumstances that a bad slack - * value suddenly got very good due to the block move, while a good slack value - * got very bad, perhaps even worse than the original worse slack value. - */ -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, - const PlacerState& placer_state) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& clb_nlist = cluster_ctx.clb_nlist; - - const auto& p_timing_ctx = placer_state.timing(); - const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack; - - //Find the original/proposed setup slacks of pins with modified values - std::vector original_setup_slacks, proposed_setup_slacks; - - auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); - for (ClusterPinId clb_pin : clb_pins_modified) { - ClusterNetId net_id = clb_nlist.pin_net(clb_pin); - size_t ipin = clb_nlist.pin_net_index(clb_pin); - - original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); - proposed_setup_slacks.push_back( - setup_slacks->setup_slack(net_id, ipin)); - } - - //Sort in ascending order, from the worse slack value to the best - std::stable_sort(original_setup_slacks.begin(), original_setup_slacks.end()); - std::stable_sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); - - //Check the first pair of slack values that are different - //If found, return their difference - for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { - float slack_diff = original_setup_slacks[idiff] - - proposed_setup_slacks[idiff]; - - if (slack_diff != 0) { - return slack_diff; - } - } - - //If all slack values are identical (or no modified slack values), - //reject this move by returning an arbitrary positive number as cost. - return 1; -} - -static e_move_result assess_swap(double delta_c, double t, vtr::RngContainer& rng) { - /* Returns: 1 -> move accepted, 0 -> rejected. */ - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c); - if (delta_c <= 0) { - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n"); - return ACCEPTED; - } - - if (t == 0.) { - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n"); - return REJECTED; - } - - float fnum = rng.frand(); - float prob_fac = std::exp(-delta_c / t); - if (prob_fac > fnum) { - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n"); - return ACCEPTED; - } - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); - return REJECTED; -} - -/** - * @brief Update the connection_timing_cost values from the temporary - * values for all connections that have/haven't changed. - * - * All the connections have already been gathered by blocks_affected.affected_pins - * after running the routine find_affected_nets_and_update_costs() in try_swap(). - */ -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerState& placer_state) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - auto& p_timing_ctx = placer_state.mutable_timing(); - auto& connection_delay = p_timing_ctx.connection_delay; - auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; - auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; - auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; - - //Go through all the sink pins affected - for (ClusterPinId pin_id : blocks_affected.affected_pins) { - ClusterNetId net_id = clb_nlist.pin_net(pin_id); - int ipin = clb_nlist.pin_net_index(pin_id); - - //Commit the timing delay and cost values - connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; - proposed_connection_delay[net_id][ipin] = INVALID_DELAY; - connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; - proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; - } -} - -//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on -//the move proposed in blocks_affected -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerTimingContext& p_timing_ctx) { -#ifndef VTR_ASSERT_SAFE_ENABLED - (void)blocks_affected; - (void)p_timing_ctx; -#else - //Invalidate temp delay & timing cost values to match sanity checks in - //comp_td_connection_cost() - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; - auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; - - for (ClusterPinId pin : blocks_affected.affected_pins) { - ClusterNetId net = clb_nlist.pin_net(pin); - int ipin = clb_nlist.pin_net_index(pin); - proposed_connection_delay[net][ipin] = INVALID_DELAY; - proposed_connection_timing_cost[net][ipin] = INVALID_DELAY; - } -#endif -} - -/** - * @brief Invalidates the connections affected by the specified block moves. - * - * All the connections recorded in blocks_affected.affected_pins have different - * values for `proposed_connection_delay` and `connection_delay`. - * - * Invalidate all the timing graph edges associated with these connections via - * the NetPinTimingInvalidator class. - */ -static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info) { - VTR_ASSERT_SAFE(timing_info); - VTR_ASSERT_SAFE(pin_tedges_invalidator); - - /* Invalidate timing graph edges affected by the move */ - for (ClusterPinId pin : blocks_affected.affected_pins) { - pin_tedges_invalidator->invalidate_connection(pin, timing_info); - } -} - /* Allocates the major structures needed only by the placer, primarily for * * computing costs quickly and such. */ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts, @@ -1848,77 +674,19 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac const std::vector& directs, PlacerState& placer_state, std::optional& noc_cost_handler) { - const auto& device_ctx = g_vpr_ctx.device(); - const auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); place_ctx.lock_loc_vars(); - size_t num_nets = cluster_ctx.clb_nlist.nets().size(); - - const int num_layers = device_ctx.grid.get_num_layers(); - init_placement_context(placer_state.mutable_blk_loc_registry(), directs); - int max_pins_per_clb = 0; - for (const t_physical_tile_type& type : device_ctx.physical_tile_types) { - max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins); - } - - if (placer_opts.place_algorithm.is_timing_driven()) { - /* Allocate structures associated with timing driven placement */ - /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ - - auto& p_timing_ctx = placer_state.mutable_timing(); - - p_timing_ctx.connection_delay = make_net_pins_matrix((const Netlist<>&)cluster_ctx.clb_nlist, 0.f); - p_timing_ctx.proposed_connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); - - p_timing_ctx.connection_setup_slack = make_net_pins_matrix(cluster_ctx.clb_nlist, std::numeric_limits::infinity()); - - p_timing_ctx.connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist); - p_timing_ctx.proposed_connection_timing_cost = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.); - p_timing_ctx.net_timing_cost.resize(num_nets, 0.); - - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { - p_timing_ctx.connection_delay[net_id][ipin] = 0; - p_timing_ctx.proposed_connection_delay[net_id][ipin] = INVALID_DELAY; - - p_timing_ctx.proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; - - if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) - continue; - - p_timing_ctx.connection_timing_cost[net_id][ipin] = INVALID_DELAY; - } - } - } - - auto& place_move_ctx = placer_state.mutable_move(); - - if (place_ctx.cube_bb) { - place_move_ctx.bb_coords.resize(num_nets, t_bb()); - place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb()); - } else { - VTR_ASSERT_SAFE(!place_ctx.cube_bb); - place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector(num_layers, t_2D_bb())); - place_move_ctx.layer_bb_coords.resize(num_nets, std::vector(num_layers, t_2D_bb())); - } - - place_move_ctx.num_sink_pin_layer.resize({num_nets, size_t(num_layers)}); - for (size_t flat_idx = 0; flat_idx < place_move_ctx.num_sink_pin_layer.size(); flat_idx++) { - auto& elem = place_move_ctx.num_sink_pin_layer.get(flat_idx); - elem = OPEN; - } - place_ctx.compressed_block_grids = create_compressed_block_grids(); if (noc_opts.noc) { noc_cost_handler.emplace(placer_state.block_locs()); } - return NetCostHandler{placer_opts, placer_state, num_nets, place_ctx.cube_bb}; + return NetCostHandler{placer_opts, placer_state, place_ctx.cube_bb}; } /* Frees the major structures needed by the placer (and not needed * @@ -2030,8 +798,8 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const PlacementDelayCalculator& delay_calc, bool is_flat, const BlkLocRegistry& blk_loc_registry) { - auto& timing_ctx = g_vpr_ctx.timing(); - auto& atom_ctx = g_vpr_ctx.atom(); + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& atom_ctx = g_vpr_ctx.atom(); VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat, blk_loc_registry); @@ -2183,4 +951,4 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { // update the graphics' reference to placement location variables get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); #endif -} +} \ No newline at end of file diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h index 138c6cdd05d..210663823a8 100644 --- a/vpr/src/place/place.h +++ b/vpr/src/place/place.h @@ -5,7 +5,6 @@ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, - t_annealing_sched annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index ce24914b7f2..e3f3d9da567 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -8,6 +8,7 @@ #include "globals.h" #include "draw_global.h" #include "place_constraints.h" +#include "noc_place_utils.h" /** * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`. @@ -60,47 +61,38 @@ void t_placer_costs::update_norm_factors() { //Prevent the norm factor from going to infinity timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); } else { - VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost } + + if (noc_enabled) { + NocCostHandler::update_noc_normalization_factors(*this); + } } -t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) { - noc_cost_terms += noc_delta_cost; +double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { + double total_cost = 0.0; - return *this; -} + if (placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE) { + // in bounding box mode we only care about wirelength + total_cost = bb_cost * bb_cost_norm; + } else if (placer_opts.place_algorithm.is_timing_driven()) { + // in timing mode we include both wirelength and timing costs + total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); + } -///@brief Constructor: Initialize all annealing state variables and macros. -t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, - float first_t, - float first_rlim, - int first_move_lim, - float first_crit_exponent, - int num_laters) { - num_temps = 0; - alpha = annealing_sched.alpha_min; - t = first_t; - restart_t = first_t; - rlim = first_rlim; - move_lim_max = first_move_lim; - crit_exponent = first_crit_exponent; - - /* Determine the current move_lim based on the schedule type */ - if (annealing_sched.type == DUSTY_SCHED) { - move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target)); - } else { - move_lim = move_lim_max; + if (noc_opts.noc) { + // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion + total_cost += calculate_noc_cost(noc_cost_terms, noc_cost_norm_factors, noc_opts); } - NUM_LAYERS = num_laters; + return total_cost; +} - /* Store this inverse value for speed when updating crit_exponent. */ - INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM); +t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) { + noc_cost_terms += noc_delta_cost; - /* The range limit cannot exceed the largest grid size. */ - auto& grid = g_vpr_ctx.device().grid; - UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1); + return *this; } int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) { @@ -126,112 +118,6 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch return move_lim; } -bool t_annealing_state::outer_loop_update(float success_rate, - const t_placer_costs& costs, - const t_placer_opts& placer_opts, - const t_annealing_sched& annealing_sched) { -#ifndef NO_GRAPHICS - t_draw_state* draw_state = get_draw_state_vars(); - if (!draw_state->list_of_breakpoints.empty()) { - /* Update temperature in the current information variable. */ - get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++; - } -#endif - - if (annealing_sched.type == USER_SCHED) { - /* Update t with user specified alpha. */ - t *= annealing_sched.alpha_t; - - /* Check if the exit criterion is met. */ - bool exit_anneal = t >= annealing_sched.exit_t; - - return exit_anneal; - } - - /* Automatically determine exit temperature. */ - auto& cluster_ctx = g_vpr_ctx.clustering(); - float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); - - if (annealing_sched.type == DUSTY_SCHED) { - /* May get nan if there are no nets */ - bool restart_temp = t < t_exit || std::isnan(t_exit); - - /* If the success rate or the temperature is * - * too low, reset the temperature and alpha. */ - if (success_rate < annealing_sched.success_min || restart_temp) { - /* Only exit anneal when alpha gets too large. */ - if (alpha > annealing_sched.alpha_max) { - return false; - } - /* Take a half step from the restart temperature. */ - t = restart_t / sqrt(alpha); - /* Update alpha. */ - alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay); - } else { - /* If the success rate is promising, next time * - * reset t to the current annealing temperature. */ - if (success_rate > annealing_sched.success_target) { - restart_t = t; - } - /* Update t. */ - t *= alpha; - } - - /* Update move lim. */ - update_move_lim(annealing_sched.success_target, success_rate); - } else { - VTR_ASSERT_SAFE(annealing_sched.type == AUTO_SCHED); - /* Automatically adjust alpha according to success rate. */ - if (success_rate > 0.96) { - alpha = 0.5; - } else if (success_rate > 0.8) { - alpha = 0.9; - } else if (success_rate > 0.15 || rlim > 1.) { - alpha = 0.95; - } else { - alpha = 0.8; - } - /* Update temp. */ - t *= alpha; - /* Must be duplicated to retain previous behavior. */ - if (t < t_exit || std::isnan(t_exit)) { - return false; - } - } - - /* Update the range limiter. */ - update_rlim(success_rate); - - /* If using timing driven algorithm, update the crit_exponent. */ - if (placer_opts.place_algorithm.is_timing_driven()) { - update_crit_exponent(placer_opts); - } - - /* Continues the annealing. */ - return true; -} - -void t_annealing_state::update_rlim(float success_rate) { - rlim *= (1. - 0.44 + success_rate); - rlim = std::min(rlim, UPPER_RLIM); - rlim = std::max(rlim, FINAL_RLIM); -} - -void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { - /* If rlim == FINAL_RLIM, then scale == 0. */ - float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM; - - /* Apply the scaling factor on crit_exponent. */ - crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) - + placer_opts.td_place_exp_first; -} - -void t_annealing_state::update_move_lim(float success_target, float success_rate) { - move_lim = move_lim_max * (success_target / success_rate); - move_lim = std::min(move_lim, move_lim_max); - move_lim = std::max(move_lim, 1); -} - ///@brief Clear all data fields. void t_placer_statistics::reset() { av_cost = 0.; @@ -390,7 +276,7 @@ bool macro_can_be_placed(const t_pl_macro& pl_macro, } } - return (mac_can_be_placed); + return mac_can_be_placed; } NocCostTerms::NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest) diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 60d4a86b1c5..2a816e01350 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -101,8 +101,9 @@ class t_placer_costs { NocCostTerms noc_cost_norm_factors; public: //Constructor - explicit t_placer_costs(t_place_algorithm algo) - : place_algorithm(algo) {} + explicit t_placer_costs(t_place_algorithm algo, bool noc) + : place_algorithm(algo) + , noc_enabled(noc) {} t_placer_costs() = default; public: //Mutator @@ -114,6 +115,18 @@ class t_placer_costs { */ void update_norm_factors(); + /** + * @brief Compute the total normalized cost for a given placement. This + * computation will vary depending on the placement modes. + * + * @param costs The current placement cost components and their normalization + * factors + * @param placer_opts Determines the placement mode + * @param noc_opts Determines if placement includes the NoC + * @return double The computed total cost of the current placement + */ + double get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); + /** * @brief Accumulates NoC cost difference terms * @@ -122,132 +135,9 @@ class t_placer_costs { t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost); private: - double MAX_INV_TIMING_COST = 1.e12; + static constexpr double MAX_INV_TIMING_COST = 1.e12; t_place_algorithm place_algorithm; -}; - -/** - * @brief Stores variables that are used by the annealing process. - * - * This structure is updated by update_annealing_state() on each outer - * loop iteration. It stores various important variables that need to - * be accessed during the placement inner loop. - * - * Private variables are not given accessor functions. They serve as - * macros originally defined in place.cpp as global scope variables. - * - * Public members: - * @param t - * Temperature for simulated annealing. - * @param restart_t - * Temperature used after restart due to minimum success ratio. - * Currently only used and updated by DUSTY_SCHED. - * @param alpha - * Temperature decays factor (multiplied each outer loop iteration). - * @param num_temps - * The count of how many temperature iterations have passed. - * - * @param rlim - * Range limit for block swaps. - * Currently only updated by DUSTY_SCHED and AUTO_SCHED. - * @param crit_exponent - * Used by timing-driven placement to "sharpen" the timing criticality. - * Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED. - * @param move_lim - * Current block move limit. - * Currently only updated by DUSTY_SCHED. - * @param move_lim_max - * Maximum block move limit. - * - * Private members: - * @param UPPER_RLIM - * The upper limit for the range limiter value. - * @param FINAL_RLIM - * The final rlim (range limit) is 1, which is the smallest value that - * can still make progress, since an rlim of 0 wouldn't allow any swaps. - * @param INVERSE_DELTA_RLIM - * Used to update crit_exponent. See update_rlim() for more. - * - * Mutators: - * @param outer_loop_update() - * Update the annealing state variables in the placement outer loop. - * @param update_rlim(), update_crit_exponent(), update_move_lim() - * Inline subroutines used by the main routine outer_loop_update(). - */ -class t_annealing_state { - public: - float t; - float restart_t; - float alpha; - int num_temps; - - float rlim; - float crit_exponent; - int move_lim; - int move_lim_max; - - private: - float UPPER_RLIM; - float FINAL_RLIM = 1.; - float INVERSE_DELTA_RLIM; - int NUM_LAYERS = 1; - - public: //Constructor - t_annealing_state(const t_annealing_sched& annealing_sched, - float first_t, - float first_rlim, - int first_move_lim, - float first_crit_exponent, - int num_layers); - - public: //Mutator - /** - * @brief Update the annealing state according to the annealing schedule selected. - * - * USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria. - * AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio. - * DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio. - * See doc/src/vpr/dusty_sa.rst for more details. - * - * @return True->continues the annealing. False->exits the annealing. - */ - bool outer_loop_update(float success_rate, - const t_placer_costs& costs, - const t_placer_opts& placer_opts, - const t_annealing_sched& annealing_sched); - - private: //Mutator - /** - * @brief Update the range limiter to keep acceptance prob. near 0.44. - * - * Use a floating point rlim to allow gradual transitions at low temps. - * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM). - */ - inline void update_rlim(float success_rate); - - /** - * @brief Update the criticality exponent. - * - * When rlim shrinks towards the FINAL_RLIM value (indicating - * that we are fine-tuning a more optimized placement), we can - * focus more on a smaller number of critical connections. - * To achieve this, we make the crit_exponent sharper, so that - * critical connections would become more critical than before. - * - * We calculate how close rlim is to its final value comparing - * to its initial value. Then, we apply the same scaling factor - * on the crit_exponent so that it lands on the suitable value - * between td_place_exp_first and td_place_exp_last. The scaling - * factor is calculated and applied linearly. - */ - inline void update_crit_exponent(const t_placer_opts& placer_opts); - - /** - * @brief Update the move limit based on the success rate. - * - * The value is bounded between 1 and move_lim_max. - */ - inline void update_move_lim(float success_target, float success_rate); + bool noc_enabled; }; /** diff --git a/vpr/src/place/placer_breakpoint.cpp b/vpr/src/place/placer_breakpoint.cpp index b576bc64f04..a31a0add053 100644 --- a/vpr/src/place/placer_breakpoint.cpp +++ b/vpr/src/place/placer_breakpoint.cpp @@ -1,45 +1,51 @@ #include "placer_breakpoint.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING //map of the available move types and their corresponding type number std::map available_move_types = { {0, "Uniform"}}; -# ifndef NO_GRAPHICS +#ifndef NO_GRAPHICS //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected) { - get_bp_state_globals()->get_glob_breakpoint_state()->blocks_affected_by_move.clear(); - for (size_t i = 0; i < blocksAffected.moved_blocks.size(); i++) { - //size_t conversion is required since block_num is of type ClusterBlockId and can't be cast to an int. And this vector has to be of type int to be recognized in expr_eval class + BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state(); - get_bp_state_globals()->get_glob_breakpoint_state()->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num)); + bp_state->blocks_affected_by_move.clear(); + for (const t_pl_moved_block& moved_block : blocksAffected.moved_blocks) { + //size_t conversion is required since block_num is of type ClusterBlockId and can't be cast to an int. And this vector has to be of type int to be recognized in expr_eval class + bp_state->blocks_affected_by_move.push_back(size_t(moved_block.block_num)); } } -void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, double delta_c, double bb_delta_c, double timing_delta_c) { +void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, + double delta_c, double bb_delta_c, double timing_delta_c) { t_draw_state* draw_state = get_draw_state_vars(); - if (draw_state->list_of_breakpoints.size() != 0) { + BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state(); + + if (!draw_state->list_of_breakpoints.empty()) { //update current information transform_blocks_affected(blocks_affected); - get_bp_state_globals()->get_glob_breakpoint_state()->move_num++; - get_bp_state_globals()->get_glob_breakpoint_state()->from_block = size_t(blocks_affected.moved_blocks[0].block_num); + bp_state->move_num++; + bp_state->from_block = size_t(blocks_affected.moved_blocks[0].block_num); //check for breakpoints set_placer_breakpoint_reached(check_for_breakpoints(true)); // the passed flag is true as we are in the placer - if (placer_breakpoint_reached()) - breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), true); - } else + if (placer_breakpoint_reached()) { + breakpoint_info_window(bp_state->bp_description, *bp_state, true); + } + } else { set_placer_breakpoint_reached(false); + } if (placer_breakpoint_reached() && draw_state->show_graphics) { std::string msg = available_move_types[0]; - if (move_outcome == 0) + if (move_outcome == e_move_result::REJECTED) { msg += vtr::string_fmt(", Rejected"); - else if (move_outcome == 1) + } else if (move_outcome == e_move_result::ACCEPTED) { msg += vtr::string_fmt(", Accepted"); - else + } else { msg += vtr::string_fmt(", Aborted"); + } msg += vtr::string_fmt(", Delta_cost: %1.6f (bb_delta_cost= %1.5f , timing_delta_c= %6.1e)", delta_c, bb_delta_c, timing_delta_c); @@ -48,6 +54,4 @@ void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affect } } -# endif //NO_GRAPHICS - -#endif //VTR_ENABLE_DEBUG_LOGGING +#endif //NO_GRAPHICS diff --git a/vpr/src/place/placer_breakpoint.h b/vpr/src/place/placer_breakpoint.h index c01ef77450c..510b7071e0d 100644 --- a/vpr/src/place/placer_breakpoint.h +++ b/vpr/src/place/placer_breakpoint.h @@ -7,14 +7,13 @@ #include "breakpoint.h" #include "draw.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected); //checks the breakpoint and see whether one of them was reached and pause place,emt accordingly -void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, double delta_c, double bb_delta_c, double timing_delta_c); +void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, + double delta_c, double bb_delta_c, double timing_delta_c); -#endif #endif diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp new file mode 100644 index 00000000000..998c097b4ca --- /dev/null +++ b/vpr/src/place/placer_state.cpp @@ -0,0 +1,101 @@ + +#include "placer_state.h" + +#include "globals.h" +#include "move_transactions.h" + +PlacerMoveContext::PlacerMoveContext(bool cube_bb) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + const size_t num_nets = cluster_ctx.clb_nlist.nets().size(); + + const int num_layers = device_ctx.grid.get_num_layers(); + + if (cube_bb) { + bb_coords.resize(num_nets, t_bb()); + bb_num_on_edges.resize(num_nets, t_bb()); + } else { + layer_bb_num_on_edges.resize(num_nets, std::vector(num_layers, t_2D_bb())); + layer_bb_coords.resize(num_nets, std::vector(num_layers, t_2D_bb())); + } + + num_sink_pin_layer.resize({num_nets, size_t(num_layers)}); + for (size_t flat_idx = 0; flat_idx < num_sink_pin_layer.size(); flat_idx++) { + int& elem = num_sink_pin_layer.get(flat_idx); + elem = OPEN; + } +} + +PlacerTimingContext::PlacerTimingContext(bool placement_is_timing_driven) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + const size_t num_nets = cluster_ctx.clb_nlist.nets().size(); + + if (placement_is_timing_driven) { + connection_delay = make_net_pins_matrix((const Netlist<>&)cluster_ctx.clb_nlist, 0.f); + proposed_connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); + + connection_setup_slack = make_net_pins_matrix(cluster_ctx.clb_nlist, std::numeric_limits::infinity()); + + connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist); + proposed_connection_timing_cost = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.); + net_timing_cost.resize(num_nets, 0.); + + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { + connection_delay[net_id][ipin] = 0; + proposed_connection_delay[net_id][ipin] = INVALID_DELAY; + + proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; + + if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + continue; + } + + connection_timing_cost[net_id][ipin] = INVALID_DELAY; + } + } + } +} + +void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + // Go through all the sink pins affected + for (ClusterPinId pin_id : blocks_affected.affected_pins) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + int ipin = clb_nlist.pin_net_index(pin_id); + + // Commit the timing delay and cost values + connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; + proposed_connection_delay[net_id][ipin] = INVALID_DELAY; + connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; + proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; + } +} + +void PlacerTimingContext::revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { +#ifndef VTR_ASSERT_SAFE_ENABLED + (void)blocks_affected; +#else + //Invalidate temp delay & timing cost values to match sanity checks in + //comp_td_connection_cost() + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; + + + for (ClusterPinId pin : blocks_affected.affected_pins) { + ClusterNetId net = clb_nlist.pin_net(pin); + int ipin = clb_nlist.pin_net_index(pin); + proposed_connection_delay[net][ipin] = INVALID_DELAY; + proposed_connection_timing_cost[net][ipin] = INVALID_DELAY; + } +#endif +} + +PlacerState::PlacerState(bool placement_is_timing_driven, bool cube_bb) + : timing_(placement_is_timing_driven) + , move_(cube_bb) {} + diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index 97941f639b1..8f3b966a56d 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -24,6 +24,29 @@ * use mutable_timing() to access it. For more, see PlacerTimingCosts. */ struct PlacerTimingContext : public Context { + PlacerTimingContext() = delete; + + /** + * @brief Allocate structures associated with timing driven placement + * @param placement_is_timing_driven Specifies whether the placement is timing driven. + */ + PlacerTimingContext(bool placement_is_timing_driven); + + /** + * @brief Update the connection_timing_cost values from the temporary + * values for all connections that have/haven't changed. + * + * All the connections have already been gathered by blocks_affected.affected_pins + * after running the routine find_affected_nets_and_update_costs() in try_swap(). + */ + void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); + + /** + * @brief Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost + * based on the move proposed in blocks_affected + */ + void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); + /** * @brief Net connection delays based on the committed block positions. * @@ -74,6 +97,8 @@ struct PlacerTimingContext : public Context { * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1] */ vtr::vector net_timing_cost; + + static constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); }; /** @@ -90,6 +115,10 @@ struct PlacerRuntimeContext : public Context { * @brief Placement Move generators data */ struct PlacerMoveContext : public Context { + public: + PlacerMoveContext() = delete; + explicit PlacerMoveContext(bool cube_bb); + public: // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) vtr::vector bb_num_on_edges; @@ -109,7 +138,7 @@ struct PlacerMoveContext : public Context { // The first range limit calculated by the annealer float first_rlim; - // Scratch vectors that are used by different directed moves for temporary calculations (allocated here to save runtime) + // Scratch vectors that are used by different directed moves for temporary calculations // These vectors will grow up with the net size as it is mostly used to save coords of the net pins or net bb edges // Given that placement moves involve operations on each coordinate independently, we chose to // utilize a Struct of Arrays (SoA) rather than an Array of Struct (AoS). @@ -121,6 +150,8 @@ struct PlacerMoveContext : public Context { std::vector> highly_crit_pins; }; + + /** * @brief This object encapsulates VPR placer's state. * @@ -135,6 +166,9 @@ struct PlacerMoveContext : public Context { * how to use this class due to similar implementation style. */ class PlacerState : public Context { + public: + PlacerState(bool placement_is_timing_driven, bool cube_bb); + public: inline const PlacerTimingContext& timing() const { return timing_; } inline PlacerTimingContext& mutable_timing() { return timing_; } diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 1e4985b7852..021bb6211fb 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -63,7 +63,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); // Routing for placement is not flat (at least for the time being) - float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), false); + float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); float new_crit = pow(clb_pin_crit, crit_params.crit_exponent); /* @@ -74,13 +74,15 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf */ if (!first_time_update_criticality) { if (new_crit > crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] < crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.push_back(std::make_pair(clb_net, pin_index_in_net)); + place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net); } else if (new_crit < crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] > crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), place_move_ctx.highly_crit_pins.end()); + place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), + place_move_ctx.highly_crit_pins.end()); } } else { - if (new_crit > crit_params.crit_limit) - place_move_ctx.highly_crit_pins.push_back(std::make_pair(clb_net, pin_index_in_net)); + if (new_crit > crit_params.crit_limit) { + place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net); + } } /* The placer likes a great deal of contrast between criticalities. diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index 7ccf73c12f4..852c1aa6297 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -102,8 +102,8 @@ class PlacerCriticalities { public: //Lifetime PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); - PlacerCriticalities(const PlacerCriticalities& clb_nlist) = delete; - PlacerCriticalities& operator=(const PlacerCriticalities& clb_nlist) = delete; + PlacerCriticalities(const PlacerCriticalities&) = delete; + PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; public: //Accessors ///@brief Returns the criticality of the specified connection. diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 23fedf6c262..7fd0f0d1680 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -8,17 +8,10 @@ static bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node); -#ifdef VTR_ENABLE_DEBUG_LOGGING static void update_router_stats(RouterStats* router_stats, bool is_push, RRNodeId rr_node_id, const RRGraphView* rr_graph); -#else -static void update_router_stats(RouterStats* router_stats, - bool is_push, - RRNodeId /*rr_node_id*/, - const RRGraphView* /*rr_graph*/); -#endif /** return tuple */ template @@ -225,10 +218,10 @@ void ConnectionRouter::timing_driven_route_connection_from_heap(RRNodeId s HeapNode cheapest; while (heap_.try_pop(cheapest)) { - // inode with cheapest total cost in current route tree to be expanded on + // inode with the cheapest total cost in current route tree to be expanded on const auto& [ new_total_cost, inode ] = cheapest; update_router_stats(router_stats_, - false, + /*is_push=*/false, inode, rr_graph_); @@ -307,10 +300,10 @@ vtr::vector ConnectionRouter::timing_driven_find HeapNode cheapest; while (heap_.try_pop(cheapest)) { - // inode with cheapest total cost in current route tree to be expanded on + // inode with the cheapest total cost in current route tree to be expanded on const auto& [ new_total_cost, inode ] = cheapest; update_router_stats(router_stats_, - false, + /*is_push=*/false, inode, rr_graph_); @@ -598,7 +591,7 @@ void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params& heap_.add_to_heap({new_total_cost, to_node}); update_router_stats(router_stats_, - true, + /*is_push=*/true, to_node, rr_graph_); @@ -912,13 +905,13 @@ void ConnectionRouter::add_route_tree_node_to_heap( } update_router_stats(router_stats_, - true, + /*is_push=*/true, inode, rr_graph_); -#ifdef VTR_ENABLE_DEBUG_LOGGING - router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++; -#endif + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++; + } } /* Expand bb by inode's extents and clip against net_bb */ @@ -1060,45 +1053,38 @@ static inline bool relevant_node_to_target(const RRGraphView* rr_graph, return false; } -#ifdef VTR_ENABLE_DEBUG_LOGGING static inline void update_router_stats(RouterStats* router_stats, bool is_push, RRNodeId rr_node_id, const RRGraphView* rr_graph) { -#else -static inline void update_router_stats(RouterStats* router_stats, - bool is_push, - RRNodeId /*rr_node_id*/, - const RRGraphView* /*rr_graph*/) { -#endif if (is_push) { router_stats->heap_pushes++; } else { router_stats->heap_pops++; } -#ifdef VTR_ENABLE_DEBUG_LOGGING - auto node_type = rr_graph->node_type(rr_node_id); - VTR_ASSERT(node_type != NUM_RR_TYPES); - - if (is_inter_cluster_node(*rr_graph, rr_node_id)) { - if (is_push) { - router_stats->inter_cluster_node_pushes++; - router_stats->inter_cluster_node_type_cnt_pushes[node_type]++; - } else { - router_stats->inter_cluster_node_pops++; - router_stats->inter_cluster_node_type_cnt_pops[node_type]++; - } - } else { - if (is_push) { - router_stats->intra_cluster_node_pushes++; - router_stats->intra_cluster_node_type_cnt_pushes[node_type]++; + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + auto node_type = rr_graph->node_type(rr_node_id); + VTR_ASSERT(node_type != NUM_RR_TYPES); + + if (is_inter_cluster_node(*rr_graph, rr_node_id)) { + if (is_push) { + router_stats->inter_cluster_node_pushes++; + router_stats->inter_cluster_node_type_cnt_pushes[node_type]++; + } else { + router_stats->inter_cluster_node_pops++; + router_stats->inter_cluster_node_type_cnt_pops[node_type]++; + } } else { - router_stats->intra_cluster_node_pops++; - router_stats->intra_cluster_node_type_cnt_pops[node_type]++; + if (is_push) { + router_stats->intra_cluster_node_pushes++; + router_stats->intra_cluster_node_type_cnt_pushes[node_type]++; + } else { + router_stats->intra_cluster_node_pops++; + router_stats->intra_cluster_node_type_cnt_pops[node_type]++; + } } } -#endif } std::unique_ptr make_connection_router(e_heap_type heap_type, diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index c2a094b494b..6bbc3449d88 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -592,10 +592,12 @@ bool route(const Netlist<>& net_list, //If the routing fails, print the overused info print_overused_nodes_status(router_opts, overuse_info); -#ifdef VTR_ENABLE_DEBUG_LOGGING - if (f_router_debug) - print_invalid_routing_info(net_list, is_flat); -#endif + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + if (f_router_debug) { + print_invalid_routing_info(net_list, is_flat); + } + } + } if (router_opts.with_timing_analysis) { @@ -608,19 +610,19 @@ bool route(const Netlist<>& net_list, VTR_LOG( "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu ", router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops); -#ifdef VTR_ENABLE_DEBUG_LOGGING - VTR_LOG( - "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", - router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, - router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); - for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { - VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + VTR_LOG( + "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", + router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, + router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); + for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { + VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); + } } -#endif VTR_LOG("\n"); return success; diff --git a/vpr/src/route/route_debug.cpp b/vpr/src/route/route_debug.cpp index 022b8da8071..6745b67013a 100644 --- a/vpr/src/route/route_debug.cpp +++ b/vpr/src/route/route_debug.cpp @@ -24,7 +24,7 @@ void enable_router_debug( router->set_router_debug(f_router_debug); -#ifndef VTR_ENABLE_DEBUG_LOGGING - VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n"); -#endif + if constexpr (!VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n"); + } } diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h index f452b95bd7a..754d118aef2 100644 --- a/vpr/src/timing/NetPinTimingInvalidator.h +++ b/vpr/src/timing/NetPinTimingInvalidator.h @@ -4,6 +4,7 @@ #include "tatum/TimingGraphFwd.hpp" #include "timing_info.h" #include "vtr_range.h" +#include "move_transactions.h" #include "vtr_vec_id_set.h" @@ -21,6 +22,24 @@ class NetPinTimingInvalidator { virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0; virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0; virtual void reset() = 0; + + /** + * @brief Invalidates the connections affected by the specified block moves. + * + * All the connections recorded in blocks_affected.affected_pins have different + * values for `proposed_connection_delay` and `connection_delay`. + * + * Invalidate all the timing graph edges associated with these connections via + * the NetPinTimingInvalidator class. + */ + void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, TimingInfo* timing_info) { + VTR_ASSERT_SAFE(timing_info); + + // Invalidate timing graph edges affected by the move + for (ClusterPinId pin : blocks_affected.affected_pins) { + invalidate_connection(pin, timing_info); + } + } }; //Helper class for iterating through the timing edges associated with a particular diff --git a/vpr/src/timing/PostClusterDelayCalculator.tpp b/vpr/src/timing/PostClusterDelayCalculator.tpp index 7f6cda39022..9c989cec03f 100644 --- a/vpr/src/timing/PostClusterDelayCalculator.tpp +++ b/vpr/src/timing/PostClusterDelayCalculator.tpp @@ -247,12 +247,9 @@ inline tatum::Time PostClusterDelayCalculator::atom_net_delay(const tatum::Timin set_cached_pins(edge_id, delay_type, (ParentPinId&)atom_src_pin, (ParentPinId&)atom_sink_pin); } else { - ClusterBlockId clb_src_block; - ClusterBlockId clb_sink_block; - - clb_src_block = netlist_lookup_.atom_clb(atom_src_block); + ClusterBlockId clb_src_block = netlist_lookup_.atom_clb(atom_src_block); VTR_ASSERT(clb_src_block != ClusterBlockId::INVALID()); - clb_sink_block = netlist_lookup_.atom_clb(atom_sink_block); + ClusterBlockId clb_sink_block = netlist_lookup_.atom_clb(atom_sink_block); VTR_ASSERT(clb_sink_block != ClusterBlockId::INVALID()); const t_pb_graph_pin* src_gpin = netlist_lookup_.atom_pin_pb_graph_pin(atom_src_pin); diff --git a/vpr/src/timing/clb_delay_calc.inl b/vpr/src/timing/clb_delay_calc.inl index 67524a82f7b..acfbc92a189 100644 --- a/vpr/src/timing/clb_delay_calc.inl +++ b/vpr/src/timing/clb_delay_calc.inl @@ -22,7 +22,7 @@ inline float ClbDelayCalc::internal_src_to_internal_sink_delay(const ClusterBloc } inline float ClbDelayCalc::trace_delay(ClusterBlockId clb, int src_pb_route_id, int sink_pb_route_id, DelayType delay_type) const { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); VTR_ASSERT(src_pb_route_id < cluster_ctx.clb_nlist.block_pb(clb)->pb_graph_node->total_pb_pins); VTR_ASSERT(sink_pb_route_id < cluster_ctx.clb_nlist.block_pb(clb)->pb_graph_node->total_pb_pins); diff --git a/vpr/src/timing/timing_graph_builder.cpp b/vpr/src/timing/timing_graph_builder.cpp index c0462429648..49534b9d380 100644 --- a/vpr/src/timing/timing_graph_builder.cpp +++ b/vpr/src/timing/timing_graph_builder.cpp @@ -295,7 +295,7 @@ void TimingGraphBuilder::build(bool allow_dangling_combinational_nodes) { } //Walk through the netlist nets adding the edges representing each net to - //the timiing graph. This connects the timing graph nodes of each netlist + //the timing graph. This connects the timing graph nodes of each netlist //block together. for (AtomNetId net : netlist_.nets()) { add_net_to_timing_graph(net); @@ -359,13 +359,13 @@ void TimingGraphBuilder::add_io_to_timing_graph(const AtomBlockId blk) { //Creates the timing graph nodes and internal edges for a netlist block void TimingGraphBuilder::add_block_to_timing_graph(const AtomBlockId blk) { /* - * How the code builds the primtive timing sub-graph + * How the code builds the primitive timing sub-graph * ------------------------------------------------- * - * The code below builds the timing sub-graph corresponding corresponding to the + * The code below builds the timing sub-graph corresponding to the * current netlist primitive/block. This is accomplished by walking through * the primitive's input, clock and output pins and creating the corresponding - * tnodes (note that if internal sequentail paths exist within the primitive + * tnodes (note that if internal sequential paths exist within the primitive * this also creates the appropriate internal tnodes). * * Once all nodes have been created the edges are added between them according @@ -374,12 +374,12 @@ void TimingGraphBuilder::add_block_to_timing_graph(const AtomBlockId blk) { * Note that to minimize the size of the timing graph we only create tnodes and * edges where they actually exist within the netlist. This means we do not create * tnodes or tedges to/from pins which are disconnected in the netlist (even if - * they exist in the archtiecture). + * they exist in the architecture). * * * Clock Generators * ---------------- - * An additional wrinkle in the above process is the presense of clock generators, + * An additional wrinkle in the above process is the presence of clock generators, * such as PLLs, which may define new clocks at their output (in contrast with a * primary input which is always a SOURCE type tnode). * @@ -545,7 +545,7 @@ void TimingGraphBuilder::create_block_internal_clock_timing_edges(const AtomBloc AtomPinId clk_pin = netlist_.port_pin(clk_port, 0); VTR_ASSERT(clk_pin); - //Convert the pin to it's tnode + //Convert the pin to its tnode NodeId clk_tnode = netlist_lookup_.atom_pin_tnode(clk_pin); VTR_ASSERT(clk_tnode); @@ -605,7 +605,7 @@ void TimingGraphBuilder::create_block_internal_data_timing_edges(const AtomBlock //to OPIN), the end of a timing path (i.e. IPIN to SINK), or an internal timing path //(i.e. SOURCE to SINK). // - //Note that the creation of these edges is driven by the 'combinationl_sink_ports' specified + //Note that the creation of these edges is driven by the 'combinational_sink_ports' specified //in the architecture primitive model for (AtomPinId src_pin : netlist_.block_input_pins(blk)) { //Note that we have already created all the relevant nodes, and appropriately labelled them as @@ -615,7 +615,7 @@ void TimingGraphBuilder::create_block_internal_data_timing_edges(const AtomBlock if (!src_tnode) continue; - auto src_type = tg_->node_type(src_tnode); + NodeType src_type = tg_->node_type(src_tnode); //Look-up the combinationally connected sink ports name on the port model AtomPortId src_port = netlist_.pin_port(src_pin); diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h index 323ac7efbb6..14d3b08f939 100644 --- a/vpr/src/timing/timing_info.h +++ b/vpr/src/timing/timing_info.h @@ -122,7 +122,7 @@ class HoldTimingInfo : public virtual TimingInfo { //Generic interface which provides both setup and hold related timing information // //This is useful for algorithms which require access to both setup and hold timing -//information (e.g. simulatneously optimizing setup and hold) +//information (e.g. simultaneously optimizing setup and hold) // //This class supports both the SetupTimingInfo and HoldTimingInfo interfaces and //can be used in place of them in any algorithm requiring setup or hold related diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index 536667faa51..a210c0dbdcd 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -697,14 +697,14 @@ std::map count_clock_fanouts(const tatum::TimingGraph& */ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, - const ParentPinId& pin_id, + const ParentPinId pin_id, bool is_flat) { float pin_crit = 0.; if (is_flat) { pin_crit = timing_info.setup_pin_criticality(convert_to_atom_pin_id(pin_id)); } else { //There may be multiple atom netlist pins connected to this CLB pin - for (const auto atom_pin : pin_lookup.connected_atom_pins(convert_to_cluster_pin_id(pin_id))) { + for (const AtomPinId atom_pin : pin_lookup.connected_atom_pins(convert_to_cluster_pin_id(pin_id))) { //Take the maximum of the atom pin criticality as the CLB pin criticality pin_crit = std::max(pin_crit, timing_info.setup_pin_criticality(atom_pin)); } diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index 51fc0491c28..e4d45c84213 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -89,7 +89,7 @@ std::map count_clock_fanouts(const tatum::TimingGraph& //Return the criticality of a net's pin in the CLB netlist float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, - const ParentPinId& clb_pin, + const ParentPinId clb_pin, bool is_flat); //Return the setup slack of a net's pin in the CLB netlist