Merge pull request #2800 from verilog-to-routing/temp_annealer_class

PlacementAnnealer class
verilog-to-routing · Nov 18, 2024 · e7f964e · e7f964e
2 parents c8d3111 + 8c0fdfc
commit e7f964e
Show file tree

Hide file tree

Showing 63 changed files with 1,790 additions and 2,307 deletions.
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
@@ -200,7 +200,7 @@ NodeId TimingGraph::add_node(const NodeType type) {
 
 EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const NodeId sink_node) {
     //We require that the source/sink node must already be in the graph,
-    //  so we can update them with thier edge references
+    //  so we can update them with their edge references
     TATUM_ASSERT(valid_node_id(src_node));
     TATUM_ASSERT(valid_node_id(sink_node));
 
@@ -211,7 +211,7 @@ EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const N
     EdgeId edge_id = EdgeId(edge_ids_.size());
     edge_ids_.push_back(edge_id);
 
-    //Create the edgge
+    //Create the edge
     edge_types_.push_back(type);
     edge_src_nodes_.push_back(src_node);
     edge_sink_nodes_.push_back(sink_node);
@@ -318,7 +318,7 @@ GraphIdMaps TimingGraph::compress() {
     levelize();
     validate();
 
-    return {node_id_map, edge_id_map};
+    return {std::move(node_id_map), std::move(edge_id_map)};
 }
 
 void TimingGraph::levelize() {
@@ -474,21 +474,20 @@ GraphIdMaps TimingGraph::optimize_layout() {
 
     levelize();
 
-    return {node_id_map, edge_id_map};
+    return {std::move(node_id_map), std::move(edge_id_map)};
 }
 
 tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const {
     //Make all edges in a level be contiguous in memory
 
     //Determine the edges driven by each level of the graph
-    std::vector<std::vector<EdgeId>> edge_levels;
+    std::vector<std::vector<EdgeId>> edge_levels(levels().size());
     for(LevelId level_id : levels()) {
-        edge_levels.push_back(std::vector<EdgeId>());
-        for(auto node_id : level_nodes(level_id)) {
+        for(NodeId node_id : level_nodes(level_id)) {
 
             //We walk the nodes according to the input-edge order.
             //This is the same order used by the arrival-time traversal (which is responsible
-            //for most of the analyzer run-time), so matching it's order exactly results in
+            //for most of the analyzer run-time), so matching its order exactly results in
             //better cache locality
             for(EdgeId edge_id : node_in_edges(node_id)) {
 
@@ -498,7 +497,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
         }
     }
 
-    //Maps from from original to new edge id, used to update node to edge refs
+    //Maps from original to new edge id, used to update node to edge refs
     tatum::util::linear_map<EdgeId,EdgeId> orig_to_new_edge_id(edges().size());
 
     //Determine the new order
@@ -874,7 +873,7 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
 }
 
 std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg, 
-                                                      const std::vector<NodeId> through_nodes, 
+                                                      const std::vector<NodeId>& through_nodes,
                                                       size_t max_depth) {
     std::vector<NodeId> nodes;
 
@@ -890,7 +889,7 @@ std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg,
 }
 
 std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg, 
-                                                const std::vector<NodeId> sinks, 
+                                                const std::vector<NodeId>& sinks,
                                                 size_t max_depth) {
     std::vector<NodeId> nodes;
 
@@ -905,7 +904,7 @@ std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg,
 }
 
 std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg, 
-                                                 const std::vector<NodeId> sources, 
+                                                 const std::vector<NodeId>& sources,
                                                  size_t max_depth) {
     std::vector<NodeId> nodes;
 

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp
@@ -11,8 +11,8 @@
  * store all edges as bi-directional edges.
  *
  * NOTE: We store only the static connectivity and node information in the 'TimingGraph' class.
- *       Other dynamic information (edge delays, node arrival/required times) is stored seperately.
- *       This means that most actions opearting on the timing graph (e.g. TimingAnalyzers) only
+ *       Other dynamic information (edge delays, node arrival/required times) is stored separately.
+ *       This means that most actions operating on the timing graph (e.g. TimingAnalyzers) only
  *       require read-only access to the timing graph.
  *
  * Accessing Graph Data
@@ -28,9 +28,9 @@
  * rather than the more typical "Array of Structs (AoS)" data layout.
  *
  * By using a SoA layout we keep all data for a particular field (e.g. node types) in contiguous
- * memory.  Using an AoS layout the various fields accross nodes would *not* be contiguous
+ * memory.  Using an AoS layout the various fields across nodes would *not* be contiguous
  * (although the different fields within each object (e.g. a TimingNode class) would be contiguous.
- * Since we typically perform operations on particular fields accross nodes the SoA layout performs
+ * Since we typically perform operations on particular fields across nodes the SoA layout performs
  * better (and enables memory ordering optimizations). The edges are also stored in a SOA format.
  *
  * The SoA layout also motivates the ID based approach, which allows direct indexing into the required
@@ -48,11 +48,12 @@
  * and ensures that each cache line pulled into the cache will (likely) be accessed multiple times
  * before being evicted.
  *
- * Note that performing these optimizations is currently done explicity by calling the optimize_edge_layout()
- * and optimize_node_layout() member functions.  In the future (particularily if incremental modification
+ * Note that performing these optimizations is currently done explicitly by calling the optimize_edge_layout()
+ * and optimize_node_layout() member functions.  In the future (particularly if incremental modification
  * support is added), it may be a good idea apply these modifications automatically as needed.
  *
  */
+#include <utility>
 #include <vector>
 #include <set>
 #include <limits>
@@ -149,7 +150,7 @@ class TimingGraph {
 
         ///\pre The graph must be levelized.
         ///\returns A range containing the nodes which are primary inputs (i.e. SOURCE's with no fanin, corresponding to top level design inputs pins)
-        ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incomming edges from the clock network)
+        ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incoming edges from the clock network)
         ///\see levelize()
         node_range primary_inputs() const { 
             TATUM_ASSERT_MSG(is_levelized_, "Timing graph must be levelized");
@@ -282,7 +283,7 @@ class TimingGraph {
         //Node data
         tatum::util::linear_map<NodeId,NodeId> node_ids_; //The node IDs in the graph
         tatum::util::linear_map<NodeId,NodeType> node_types_; //Type of node
-        tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incomiing edge IDs for node
+        tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incoming edge IDs for node
         tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_out_edges_; //Out going edge IDs for node
         tatum::util::linear_map<NodeId,LevelId> node_levels_; //Out going edge IDs for node
 
@@ -293,12 +294,12 @@ class TimingGraph {
         tatum::util::linear_map<EdgeId,NodeId> edge_src_nodes_; //Source node for each edge
         tatum::util::linear_map<EdgeId,bool>   edges_disabled_;
 
-        //Auxilary graph-level info, filled in by levelize()
+        //Auxiliary graph-level info, filled in by levelize()
         tatum::util::linear_map<LevelId,LevelId> level_ids_; //The level IDs in the graph
         tatum::util::linear_map<LevelId,std::vector<NodeId>> level_nodes_; //Nodes in each level
         std::vector<NodeId> primary_inputs_; //Primary input nodes of the timing graph.
         std::vector<NodeId> logical_outputs_; //Logical output nodes of the timing graph.
-        bool is_levelized_ = false; //Inidcates if the current levelization is valid
+        bool is_levelized_ = false; //Indicates if the current levelization is valid
 
         bool allow_dangling_combinational_nodes_ = false;
 
@@ -310,26 +311,31 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
 //Returns the set of nodes transitively connected (either fanin or fanout) to nodes in through_nodes
 //up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg, 
-                                                      const std::vector<NodeId> through_nodes, 
+                                                      const std::vector<NodeId>& through_nodes,
                                                       size_t max_depth=std::numeric_limits<size_t>::max());
 
 //Returns the set of nodes in the transitive fanin of nodes in sinks up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg, 
-                                                const std::vector<NodeId> sinks, 
+                                                const std::vector<NodeId>& sinks,
                                                 size_t max_depth=std::numeric_limits<size_t>::max());
 
 //Returns the set of nodes in the transitive fanout of nodes in sources up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg,
-                                                 const std::vector<NodeId> sources, 
+                                                 const std::vector<NodeId>& sources,
                                                  size_t max_depth=std::numeric_limits<size_t>::max());
 
 EdgeType infer_edge_type(const TimingGraph& tg, EdgeId edge);
 
 //Mappings from old to new IDs
 struct GraphIdMaps {
-    GraphIdMaps(tatum::util::linear_map<NodeId,NodeId> node_map,
-                tatum::util::linear_map<EdgeId,EdgeId> edge_map)
+    GraphIdMaps(const tatum::util::linear_map<NodeId,NodeId>& node_map,
+                const tatum::util::linear_map<EdgeId,EdgeId>& edge_map)
         : node_id_map(node_map), edge_id_map(edge_map) {}
+
+    GraphIdMaps(tatum::util::linear_map<NodeId,NodeId>&& node_map,
+                tatum::util::linear_map<EdgeId,EdgeId>&& edge_map)
+        : node_id_map(std::move(node_map)), edge_id_map(std::move(edge_map)) {}
+
     tatum::util::linear_map<NodeId,NodeId> node_id_map;
     tatum::util::linear_map<EdgeId,EdgeId> edge_id_map;
 };

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
@@ -18,7 +18,7 @@ namespace tatum {
  * This file defines the AnalyzerFactory class used to construct timing analyzers.
  *
  * We assume that the user has already defined the timing graph, constraints and
- * thier own delay calculator: 
+ * their own delay calculator:
  *
  *      TimingGraph timing_graph;
  *      TimingConstraints timing_constraints;
@@ -33,7 +33,7 @@ namespace tatum {
  *                                                                 timing_constraints,
  *                                                                 delay_calculator);
  *
- * We can similarily generate analyzers for other types of analysis, for instance Hold:
+ * We can similarly generate analyzers for other types of analysis, for instance Hold:
  *
  *      auto hold_analyzer = AnalyzerFactory<SetupAnalysis>::make(timing_graph,
  *                                                                timing_constraints,
@@ -45,7 +45,7 @@ namespace tatum {
  *                                                                                         timing_constraints,
  *                                                                                         delay_calculator);
  *
- * The AnalzyerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class:
+ * The AnalyzerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class:
  *
  *      SetupAnalysis       =>  SetupTimingAnalyzer
  *      HoldAnalysis        =>  HoldTimingAnalyzer

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp
@@ -8,7 +8,7 @@
 namespace tatum {
 
 /** 
- * An exmaple DelayCalculator implementation which takes 
+ * An example DelayCalculator implementation which takes
  * a vector of fixed pre-calculated edge delays
  *
  * \see DelayCalculator

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp
@@ -21,7 +21,7 @@ class GraphVisitor {
         virtual void do_reset_node_arrival_tags_from_origin(const NodeId node_id, const NodeId origin) = 0;
         virtual void do_reset_node_required_tags_from_origin(const NodeId node_id, const NodeId origin) = 0;
 
-        //Returns true if the specified source/sink is unconstrainted
+        //Returns true if the specified source/sink is unconstrained
         virtual bool do_arrival_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0;
         virtual bool do_required_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0;
 

diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h
@@ -38,6 +38,10 @@ class DeviceGrid {
     size_t width() const { return grid_.dim_size(1); }
     ///@brief Return the height of the grid at the specified layer
     size_t height() const { return grid_.dim_size(2); }
+    ///@brief Return the grid dimensions in (# of layers, width, height) format
+    std::tuple<size_t, size_t, size_t> dim_sizes() const {
+        return {grid_.dim_size(0), grid_.dim_size(1), grid_.dim_size(2)};
+    }
 
     ///@brief Return the size of the flattened grid on the given layer
     inline size_t grid_size() const {

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
@@ -92,7 +92,6 @@ void SetupVPR(const t_options* options,
               t_packer_opts* packerOpts,
               t_placer_opts* placerOpts,
               t_ap_opts* apOpts,
-              t_annealing_sched* annealSched,
               t_router_opts* routerOpts,
               t_analysis_opts* analysisOpts,
               t_noc_opts* nocOpts,
@@ -140,7 +139,7 @@ void SetupVPR(const t_options* options,
 
     SetupNetlistOpts(*options, *netlistOpts);
     SetupPlacerOpts(*options, placerOpts);
-    SetupAnnealSched(*options, annealSched);
+    SetupAnnealSched(*options, &placerOpts->anneal_sched);
     SetupRouterOpts(*options, routerOpts);
     SetupAnalysisOpts(*options, *analysisOpts);
     SetupPowerOpts(*options, powerOpts, arch);
@@ -395,7 +394,7 @@ static void SetupSwitches(const t_arch& Arch,
     device_ctx.delayless_switch_idx = RoutingArch->delayless_switch;
 
     //Warn about non-zero Cout values for the ipin switch, since these values have no effect.
-    //VPR do not model the R/C's of block internal routing connectsion.
+    //VPR do not model the R/C's of block internal routing connection.
     //
     //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO)
     if (device_ctx.arch_switch_inf[RoutingArch->wire_to_arch_ipin_switch].Cout != 0.) {
@@ -531,31 +530,6 @@ static void SetupAnnealSched(const t_options& Options,
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n");
     }
 
-    AnnealSched->alpha_min = Options.PlaceAlphaMin;
-    if (AnnealSched->alpha_min >= 1 || AnnealSched->alpha_min <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_min must be between 0 and 1 exclusive.\n");
-    }
-
-    AnnealSched->alpha_max = Options.PlaceAlphaMax;
-    if (AnnealSched->alpha_max >= 1 || AnnealSched->alpha_max <= AnnealSched->alpha_min) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_max must be between alpha_min and 1 exclusive.\n");
-    }
-
-    AnnealSched->alpha_decay = Options.PlaceAlphaDecay;
-    if (AnnealSched->alpha_decay >= 1 || AnnealSched->alpha_decay <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_decay must be between 0 and 1 exclusive.\n");
-    }
-
-    AnnealSched->success_min = Options.PlaceSuccessMin;
-    if (AnnealSched->success_min >= 1 || AnnealSched->success_min <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_min must be between 0 and 1 exclusive.\n");
-    }
-
-    AnnealSched->success_target = Options.PlaceSuccessTarget;
-    if (AnnealSched->success_target >= 1 || AnnealSched->success_target <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_target must be between 0 and 1 exclusive.\n");
-    }
-
     AnnealSched->type = Options.anneal_sched_type;
 }
 
@@ -600,7 +574,6 @@ void SetupPackerOpts(const t_options& Options,
     //TODO: document?
     PackerOpts->inter_cluster_net_delay = 1.0; /* DEFAULT */
     PackerOpts->auto_compute_inter_cluster_net_delay = true;
-    PackerOpts->packer_algorithm = PACK_GREEDY; /* DEFAULT */
 
     PackerOpts->device_layout = Options.device_layout;
 
@@ -782,7 +755,7 @@ static void SetupServerOpts(const t_options& Options, t_server_opts* ServerOpts)
 }
 
 static void find_ipin_cblock_switch_index(const t_arch& Arch, int& wire_to_arch_ipin_switch, int& wire_to_arch_ipin_switch_between_dice) {
-    for (auto cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) {
+    for (int cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) {
         int ipin_cblock_switch_index = UNDEFINED;
         for (int iswitch = 0; iswitch < (int)Arch.switches.size(); ++iswitch) {
             if (Arch.switches[iswitch].name == Arch.ipin_cblock_switch_name[cb_switch_name_index]) {

diff --git a/vpr/src/base/SetupVPR.h b/vpr/src/base/SetupVPR.h
@@ -17,7 +17,6 @@ void SetupVPR(const t_options* Options,
               t_packer_opts* PackerOpts,
               t_placer_opts* PlacerOpts,
               t_ap_opts* APOpts,
-              t_annealing_sched* AnnealSched,
               t_router_opts* RouterOpts,
               t_analysis_opts* AnalysisOpts,
               t_noc_opts* NocOpts,