Merge branch 'master' into patch-1

verilog-to-routing · Nov 18, 2024 · 2bdc0e9 · 2bdc0e9
2 parents e6b4a62 + 920e8ab
commit 2bdc0e9
Show file tree

Hide file tree

Showing 265 changed files with 15,183 additions and 5,952 deletions.
diff --git a/.github/workflows/nightly_test.yml b/.github/workflows/nightly_test.yml
@@ -4,20 +4,23 @@ on:
   # We want to run the CI when anything is pushed to master.
   # Since master is a protected branch this only happens when a PR is merged.
   # This is a double check in case the PR was stale and had some issues.
-  push:
-    branches:
-      - master
-    paths-ignore: # Prevents from running if only docs are updated
-      - 'doc/**'
-      - '**/*README*'
-      - '**.md'
-      - '**.rst'
-  pull_request:
-    paths-ignore: # Prevents from running if only docs are updated
-      - 'doc/**'
-      - '**/*README*'
-      - '**.md'
-      - '**.rst'
+  # NOTE: This was turned off in late October 2024 since the Nightly Tests were
+  #       no longer working on the self-hosted runners. Will turn this back on
+  #       once the issue is resolved.
+  # push:
+  #   branches:
+  #     - master
+  #   paths-ignore: # Prevents from running if only docs are updated
+  #     - 'doc/**'
+  #     - '**/*README*'
+  #     - '**.md'
+  #     - '**.rst'
+  # pull_request:
+  #   paths-ignore: # Prevents from running if only docs are updated
+  #     - 'doc/**'
+  #     - '**/*README*'
+  #     - '**.md'
+  #     - '**.rst'
   workflow_dispatch:
   schedule:
   - cron: '0 0 * * *' # daily

diff --git a/doc/src/arch/reference.rst b/doc/src/arch/reference.rst
@@ -2337,8 +2337,8 @@ The ``<direct>`` tag and its contents are described below.
     :req_param y_offset: The y location of the receiving CLB relative to the driving CLB.
     :req_param z_offset: The z location of the receiving CLB relative to the driving CLB.
     :opt_param switch_name: [Optional, defaults to delay-less switch if not specified] The name of the ``<switch>`` from ``<switchlist>`` to be used for this direct connection.
-    :opt_param from_side: The associated from_pin's block size (must be one of ``left``, ``right``, ``top``, ``bottom`` or left unspecified)
-    :opt_param to_side: The associated to_pin's block size (must be one of ``left``, ``right``, ``top``, ``bottom`` or left unspecified)
+    :opt_param from_side: The associated from_pin's block side (must be one of ``left``, ``right``, ``top``, ``bottom`` or left unspecified)
+    :opt_param to_side: The associated to_pin's block side (must be one of ``left``, ``right``, ``top``, ``bottom`` or left unspecified)
 
     Describes a dedicated connection between two complex block pins that skips general interconnect.
     This is useful for describing structures such as carry chains as well as adjacent neighbour connections.

diff --git a/doc/src/quickstart/blink_implementation.png b/doc/src/quickstart/blink_implementation.png
diff --git a/doc/src/quickstart/index.rst b/doc/src/quickstart/index.rst
diff --git a/doc/src/quickstart/tseng_blk1.png b/doc/src/quickstart/tseng_blk1.png
diff --git a/doc/src/quickstart/tseng_nets.png b/doc/src/quickstart/tseng_nets.png
diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
@@ -1074,12 +1074,16 @@ The following options are only used when FPGA device and netlist contain a NoC r
 
     .. note:: noc_flows_file are required to specify if NoC optimization is turned on (--noc on).
 
-.. option:: --noc_routing_algorithm {xy_routing | bfs_routing}
+.. option:: --noc_routing_algorithm {xy_routing | bfs_routing | west_first_routing | north_last_routing | negative_first_routing | odd_even_routing}
 
     Controls the algorithm used by the NoC to route packets.
 
     * ``xy_routing`` Uses the direction oriented routing algorithm. This is recommended to be used with mesh NoC topologies.
-    * ``bfs_routing`` Uses the breadth first search algorithm. The objective is to find a route that uses a minimum number of links. This can be used with any NoC topology.
+    * ``bfs_routing`` Uses the breadth first search algorithm. The objective is to find a route that uses a minimum number of links. This algorithm is not guaranteed to generate deadlock-free traffic flow routes, but can be used with any NoC topology.
+    * ``west_first_routing`` Uses the west-first routing algorithm. This is recommended to be used with mesh NoC topologies.
+    * ``north_last_routing`` Uses the north-last routing algorithm. This is recommended to be used with mesh NoC topologies.
+    * ``negative_first_routing`` Uses the negative-first routing algorithm. This is recommended to be used with mesh NoC topologies.
+    * ``odd_even_routing`` Uses the odd-even routing algorithm. This is recommended to be used with mesh NoC topologies.
 
     **Default:** ``bfs_routing``
 
@@ -1091,28 +1095,45 @@ The following options are only used when FPGA device and netlist contain a NoC r
     * ``noc_placement_weighting = 1`` means noc placement is considered equal to timing and wirelength.
     * ``noc_placement_weighting > 1`` means the placement is increasingly dominated by NoC parameters.
 
-    **Default:** ``0.6``
+    **Default:** ``5.0``
+
+.. option:: --noc_aggregate_bandwidth_weighting <float>
+
+    Controls the importance of minimizing the NoC aggregate bandwidth. This value can be >=0, where 0 would mean the aggregate bandwidth has no relevance to placement.
+    Other positive numbers specify the importance of minimizing the NoC aggregate bandwidth compared to other NoC-related cost terms.
+    Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and
+    only their relative ratios determine the importance of each cost term.
+
+    **Default:** ``0.38``
 
 .. option:: --noc_latency_constraints_weighting <float>
 
-    Controls the importance of meeting all the NoC traffic flow latency constraints.
+    Controls the importance of meeting all the NoC traffic flow latency constraints. This value can be >=0, where 0 would mean latency constraints have no relevance to placement.
+    Other positive numbers specify the importance of meeting latency constraints compared to other NoC-related cost terms.
+    Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and
+    only their relative ratios determine the importance of each cost term.
 
-    * ``latency_constraints = 0`` means the latency constraints have no relevance to placement.
-    * ``0 < latency_constraints < 1`` means the latency constraints are weighted equally to the sum of other placement cost components. 
-    * ``latency_constraints > 1`` means the placement is increasingly dominated by reducing the latency constraints of the traffic flows.
-
-    **Default:** ``1``
+    **Default:** ``0.6``
 
 .. option:: --noc_latency_weighting <float>
 
     Controls the importance of reducing the latencies of the NoC traffic flows.
-    This value can be >=0, 
+    This value can be >=0, where 0 would mean the latencies have no relevance to placement
+    Other positive numbers specify the importance of minimizing aggregate latency compared to other NoC-related cost terms.
+    Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and
+    only their relative ratios determine the importance of each cost term.
 
-    * ``latency = 0`` means the latencies have no relevance to placement.
-    * ``0 < latency < 1`` means the latencies are weighted equally to the sum of other placement cost components. 
-    * ``latency > 1`` means the placement is increasingly dominated by reducing the latencies of the traffic flows.
-
-    **Default:** ``0.05``
+    **Default:** ``0.02``
+
+.. option:: --noc_congestion_weighting <float>
+
+    Controls the importance of reducing the congestion of the NoC links.
+    This value can be >=0, where 0 would mean the congestion has no relevance to placement.
+    Other positive numbers specify the importance of minimizing congestion compared to other NoC-related cost terms.
+    Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and
+    only their relative ratios determine the importance of each cost term.
+
+    **Default:** ``0.25``
 
 .. option:: --noc_swap_percentage <float>
 
@@ -1122,7 +1143,7 @@ The following options are only used when FPGA device and netlist contain a NoC r
     * ``0`` means NoC blocks will be moved at the same rate as other blocks. 
     * ``100`` means all swaps attempted by the placer are NoC router blocks.
 
-    **Default:** ``40``    
+    **Default:** ``0``
 
 .. option:: --noc_placement_file_name <file>
 

diff --git a/libs/EXTERNAL/libblifparse/CMakeLists.txt b/libs/EXTERNAL/libblifparse/CMakeLists.txt
@@ -45,6 +45,10 @@ add_library(libblifparse STATIC
 target_include_directories(libblifparse PUBLIC ${LIB_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR})
 set_target_properties(libblifparse PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
 
+# Set the read buffer size in the generated lexers. This reduces the number of
+# syscalls since the default is only 1kB.
+target_compile_definitions(libblifparse PRIVATE YY_READ_BUF_SIZE=1048576)
+
 #Create the test executable
 add_executable(blifparse_test src/main.cpp)
 target_link_libraries(blifparse_test libblifparse)

diff --git a/libs/EXTERNAL/libcatch2 b/libs/EXTERNAL/libcatch2
diff --git a/libs/EXTERNAL/libezgl/CMakeLists.txt b/libs/EXTERNAL/libezgl/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
 # create the project
 project(

diff --git a/libs/EXTERNAL/libezgl/examples/basic-application/CMakeLists.txt b/libs/EXTERNAL/libezgl/examples/basic-application/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
 project(
   basic-application

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
@@ -200,7 +200,7 @@ NodeId TimingGraph::add_node(const NodeType type) {
 
 EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const NodeId sink_node) {
     //We require that the source/sink node must already be in the graph,
-    //  so we can update them with thier edge references
+    //  so we can update them with their edge references
     TATUM_ASSERT(valid_node_id(src_node));
     TATUM_ASSERT(valid_node_id(sink_node));
 
@@ -211,7 +211,7 @@ EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const N
     EdgeId edge_id = EdgeId(edge_ids_.size());
     edge_ids_.push_back(edge_id);
 
-    //Create the edgge
+    //Create the edge
     edge_types_.push_back(type);
     edge_src_nodes_.push_back(src_node);
     edge_sink_nodes_.push_back(sink_node);
@@ -318,7 +318,7 @@ GraphIdMaps TimingGraph::compress() {
     levelize();
     validate();
 
-    return {node_id_map, edge_id_map};
+    return {std::move(node_id_map), std::move(edge_id_map)};
 }
 
 void TimingGraph::levelize() {
@@ -474,21 +474,20 @@ GraphIdMaps TimingGraph::optimize_layout() {
 
     levelize();
 
-    return {node_id_map, edge_id_map};
+    return {std::move(node_id_map), std::move(edge_id_map)};
 }
 
 tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const {
     //Make all edges in a level be contiguous in memory
 
     //Determine the edges driven by each level of the graph
-    std::vector<std::vector<EdgeId>> edge_levels;
+    std::vector<std::vector<EdgeId>> edge_levels(levels().size());
     for(LevelId level_id : levels()) {
-        edge_levels.push_back(std::vector<EdgeId>());
-        for(auto node_id : level_nodes(level_id)) {
+        for(NodeId node_id : level_nodes(level_id)) {
 
             //We walk the nodes according to the input-edge order.
             //This is the same order used by the arrival-time traversal (which is responsible
-            //for most of the analyzer run-time), so matching it's order exactly results in
+            //for most of the analyzer run-time), so matching its order exactly results in
             //better cache locality
             for(EdgeId edge_id : node_in_edges(node_id)) {
 
@@ -498,7 +497,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
         }
     }
 
-    //Maps from from original to new edge id, used to update node to edge refs
+    //Maps from original to new edge id, used to update node to edge refs
     tatum::util::linear_map<EdgeId,EdgeId> orig_to_new_edge_id(edges().size());
 
     //Determine the new order
@@ -874,7 +873,7 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
 }
 
 std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg, 
-                                                      const std::vector<NodeId> through_nodes, 
+                                                      const std::vector<NodeId>& through_nodes,
                                                       size_t max_depth) {
     std::vector<NodeId> nodes;
 
@@ -890,7 +889,7 @@ std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg,
 }
 
 std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg, 
-                                                const std::vector<NodeId> sinks, 
+                                                const std::vector<NodeId>& sinks,
                                                 size_t max_depth) {
     std::vector<NodeId> nodes;
 
@@ -905,7 +904,7 @@ std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg,
 }
 
 std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg, 
-                                                 const std::vector<NodeId> sources, 
+                                                 const std::vector<NodeId>& sources,
                                                  size_t max_depth) {
     std::vector<NodeId> nodes;
 

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp
@@ -11,8 +11,8 @@
  * store all edges as bi-directional edges.
  *
  * NOTE: We store only the static connectivity and node information in the 'TimingGraph' class.
- *       Other dynamic information (edge delays, node arrival/required times) is stored seperately.
- *       This means that most actions opearting on the timing graph (e.g. TimingAnalyzers) only
+ *       Other dynamic information (edge delays, node arrival/required times) is stored separately.
+ *       This means that most actions operating on the timing graph (e.g. TimingAnalyzers) only
  *       require read-only access to the timing graph.
  *
  * Accessing Graph Data
@@ -28,9 +28,9 @@
  * rather than the more typical "Array of Structs (AoS)" data layout.
  *
  * By using a SoA layout we keep all data for a particular field (e.g. node types) in contiguous
- * memory.  Using an AoS layout the various fields accross nodes would *not* be contiguous
+ * memory.  Using an AoS layout the various fields across nodes would *not* be contiguous
  * (although the different fields within each object (e.g. a TimingNode class) would be contiguous.
- * Since we typically perform operations on particular fields accross nodes the SoA layout performs
+ * Since we typically perform operations on particular fields across nodes the SoA layout performs
  * better (and enables memory ordering optimizations). The edges are also stored in a SOA format.
  *
  * The SoA layout also motivates the ID based approach, which allows direct indexing into the required
@@ -48,11 +48,12 @@
  * and ensures that each cache line pulled into the cache will (likely) be accessed multiple times
  * before being evicted.
  *
- * Note that performing these optimizations is currently done explicity by calling the optimize_edge_layout()
- * and optimize_node_layout() member functions.  In the future (particularily if incremental modification
+ * Note that performing these optimizations is currently done explicitly by calling the optimize_edge_layout()
+ * and optimize_node_layout() member functions.  In the future (particularly if incremental modification
  * support is added), it may be a good idea apply these modifications automatically as needed.
  *
  */
+#include <utility>
 #include <vector>
 #include <set>
 #include <limits>
@@ -149,7 +150,7 @@ class TimingGraph {
 
         ///\pre The graph must be levelized.
         ///\returns A range containing the nodes which are primary inputs (i.e. SOURCE's with no fanin, corresponding to top level design inputs pins)
-        ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incomming edges from the clock network)
+        ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incoming edges from the clock network)
         ///\see levelize()
         node_range primary_inputs() const { 
             TATUM_ASSERT_MSG(is_levelized_, "Timing graph must be levelized");
@@ -282,7 +283,7 @@ class TimingGraph {
         //Node data
         tatum::util::linear_map<NodeId,NodeId> node_ids_; //The node IDs in the graph
         tatum::util::linear_map<NodeId,NodeType> node_types_; //Type of node
-        tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incomiing edge IDs for node
+        tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incoming edge IDs for node
         tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_out_edges_; //Out going edge IDs for node
         tatum::util::linear_map<NodeId,LevelId> node_levels_; //Out going edge IDs for node
 
@@ -293,12 +294,12 @@ class TimingGraph {
         tatum::util::linear_map<EdgeId,NodeId> edge_src_nodes_; //Source node for each edge
         tatum::util::linear_map<EdgeId,bool>   edges_disabled_;
 
-        //Auxilary graph-level info, filled in by levelize()
+        //Auxiliary graph-level info, filled in by levelize()
         tatum::util::linear_map<LevelId,LevelId> level_ids_; //The level IDs in the graph
         tatum::util::linear_map<LevelId,std::vector<NodeId>> level_nodes_; //Nodes in each level
         std::vector<NodeId> primary_inputs_; //Primary input nodes of the timing graph.
         std::vector<NodeId> logical_outputs_; //Logical output nodes of the timing graph.
-        bool is_levelized_ = false; //Inidcates if the current levelization is valid
+        bool is_levelized_ = false; //Indicates if the current levelization is valid
 
         bool allow_dangling_combinational_nodes_ = false;
 
@@ -310,26 +311,31 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
 //Returns the set of nodes transitively connected (either fanin or fanout) to nodes in through_nodes
 //up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg, 
-                                                      const std::vector<NodeId> through_nodes, 
+                                                      const std::vector<NodeId>& through_nodes,
                                                       size_t max_depth=std::numeric_limits<size_t>::max());
 
 //Returns the set of nodes in the transitive fanin of nodes in sinks up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg, 
-                                                const std::vector<NodeId> sinks, 
+                                                const std::vector<NodeId>& sinks,
                                                 size_t max_depth=std::numeric_limits<size_t>::max());
 
 //Returns the set of nodes in the transitive fanout of nodes in sources up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg,
-                                                 const std::vector<NodeId> sources, 
+                                                 const std::vector<NodeId>& sources,
                                                  size_t max_depth=std::numeric_limits<size_t>::max());
 
 EdgeType infer_edge_type(const TimingGraph& tg, EdgeId edge);
 
 //Mappings from old to new IDs
 struct GraphIdMaps {
-    GraphIdMaps(tatum::util::linear_map<NodeId,NodeId> node_map,
-                tatum::util::linear_map<EdgeId,EdgeId> edge_map)
+    GraphIdMaps(const tatum::util::linear_map<NodeId,NodeId>& node_map,
+                const tatum::util::linear_map<EdgeId,EdgeId>& edge_map)
         : node_id_map(node_map), edge_id_map(edge_map) {}
+
+    GraphIdMaps(tatum::util::linear_map<NodeId,NodeId>&& node_map,
+                tatum::util::linear_map<EdgeId,EdgeId>&& edge_map)
+        : node_id_map(std::move(node_map)), edge_id_map(std::move(edge_map)) {}
+
     tatum::util::linear_map<NodeId,NodeId> node_id_map;
     tatum::util::linear_map<EdgeId,EdgeId> edge_id_map;
 };