Merge branch 'master' into tmep_placer_class

verilog-to-routing · Nov 20, 2024 · 87f4e2e · 87f4e2e
2 parents 291ec6f + 920e8ab
commit 87f4e2e
Show file tree

Hide file tree

Showing 38 changed files with 6,877 additions and 182 deletions.
diff --git a/doc/src/quickstart/blink_implementation.png b/doc/src/quickstart/blink_implementation.png
diff --git a/doc/src/quickstart/index.rst b/doc/src/quickstart/index.rst
diff --git a/doc/src/quickstart/tseng_blk1.png b/doc/src/quickstart/tseng_blk1.png
diff --git a/doc/src/quickstart/tseng_nets.png b/doc/src/quickstart/tseng_nets.png
diff --git a/libs/EXTERNAL/libblifparse/CMakeLists.txt b/libs/EXTERNAL/libblifparse/CMakeLists.txt
@@ -45,6 +45,10 @@ add_library(libblifparse STATIC
 target_include_directories(libblifparse PUBLIC ${LIB_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR})
 set_target_properties(libblifparse PROPERTIES PREFIX "") #Avoid extra 'lib' prefix
 
+# Set the read buffer size in the generated lexers. This reduces the number of
+# syscalls since the default is only 1kB.
+target_compile_definitions(libblifparse PRIVATE YY_READ_BUF_SIZE=1048576)
+
 #Create the test executable
 add_executable(blifparse_test src/main.cpp)
 target_link_libraries(blifparse_test libblifparse)

diff --git a/libs/EXTERNAL/libcatch2 b/libs/EXTERNAL/libcatch2
diff --git a/libs/EXTERNAL/libezgl/CMakeLists.txt b/libs/EXTERNAL/libezgl/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
 # create the project
 project(

diff --git a/libs/EXTERNAL/libezgl/examples/basic-application/CMakeLists.txt b/libs/EXTERNAL/libezgl/examples/basic-application/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
 project(
   basic-application

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
@@ -481,14 +481,13 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
     //Make all edges in a level be contiguous in memory
 
     //Determine the edges driven by each level of the graph
-    std::vector<std::vector<EdgeId>> edge_levels;
+    std::vector<std::vector<EdgeId>> edge_levels(levels().size());
     for(LevelId level_id : levels()) {
-        edge_levels.emplace_back();
-        for(auto node_id : level_nodes(level_id)) {
+        for(NodeId node_id : level_nodes(level_id)) {
 
             //We walk the nodes according to the input-edge order.
             //This is the same order used by the arrival-time traversal (which is responsible
-            //for most of the analyzer run-time), so matching it's order exactly results in
+            //for most of the analyzer run-time), so matching its order exactly results in
             //better cache locality
             for(EdgeId edge_id : node_in_edges(node_id)) {
 
@@ -498,7 +497,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
         }
     }
 
-    //Maps from from original to new edge id, used to update node to edge refs
+    //Maps from original to new edge id, used to update node to edge refs
     tatum::util::linear_map<EdgeId,EdgeId> orig_to_new_edge_id(edges().size());
 
     //Determine the new order

diff --git a/libs/libpugiutil/src/pugixml_loc.cpp b/libs/libpugiutil/src/pugixml_loc.cpp
@@ -1,8 +1,14 @@
 #include <cstdio>
 #include <algorithm>
+#include <vector>
 #include "pugixml_util.hpp"
 #include "pugixml_loc.hpp"
 
+// The size of the read buffer when reading from a file.
+#ifndef PUGI_UTIL_READ_BUF_SIZE
+#define PUGI_UTIL_READ_BUF_SIZE 1048576
+#endif // PUGI_UTIL_READ_BUF_SIZE
+
 namespace pugiutil {
 
 //Return the line number from the given offset
@@ -30,10 +36,10 @@ void loc_data::build_loc_data() {
 
     std::ptrdiff_t offset = 0;
 
-    char buffer[1024];
+    std::vector<char> buffer(PUGI_UTIL_READ_BUF_SIZE);
     std::size_t size;
 
-    while ((size = fread(buffer, 1, sizeof(buffer), f)) > 0) {
+    while ((size = fread(buffer.data(), 1, buffer.size() * sizeof(char), f)) > 0) {
         for (std::size_t i = 0; i < size; ++i) {
             if (buffer[i] == '\n') {
                 offsets_.push_back(offset + i);

diff --git a/libs/libvtrutil/src/vtr_digest.cpp b/libs/libvtrutil/src/vtr_digest.cpp
@@ -3,10 +3,15 @@
 
 #include <iostream>
 #include <fstream>
-#include <array>
+#include <vector>
 
 #include "picosha2.h"
 
+// The size of the read buffer when reading from a file.
+#ifndef VTR_UTIL_READ_BUF_SIZE
+#define VTR_UTIL_READ_BUF_SIZE 1048576
+#endif // VTR_UTIL_READ_BUF_SIZE
+
 namespace vtr {
 
 std::string secure_digest_file(const std::string& filepath) {
@@ -21,7 +26,7 @@ std::string secure_digest_stream(std::istream& is) {
     //Read the stream in chunks and calculate the SHA256 digest
     picosha2::hash256_one_by_one hasher;
 
-    std::array<char, 1024> buf;
+    std::vector<char> buf(VTR_UTIL_READ_BUF_SIZE);
     while (!is.eof()) {
         //Process a chunk
         is.read(buf.data(), buf.size());

diff --git a/utils/route_diag/CMakeLists.txt b/utils/route_diag/CMakeLists.txt
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.16)
-cmake_policy(VERSION 3.9)
+cmake_policy(VERSION 3.10)
 
 project("route_diag")
 

diff --git a/vpr/src/analytical_place/full_legalizer.cpp b/vpr/src/analytical_place/full_legalizer.cpp
@@ -26,6 +26,7 @@
 #include "logic_types.h"
 #include "pack.h"
 #include "physical_types.h"
+#include "place_and_route.h"
 #include "place_constraints.h"
 #include "place_macro.h"
 #include "verify_clustering.h"
@@ -103,9 +104,6 @@ class APClusterPlacer {
         g_vpr_ctx.mutable_placement().cube_bb = false;
         g_vpr_ctx.mutable_placement().compressed_block_grids = create_compressed_block_grids();
 
-        // Initialize the macros
-        blk_loc_registry.mutable_place_macros().alloc_and_load_placement_macros(directs);
-
         // TODO: The next few steps will be basically a direct copy of the initial
         //       placement code since it does everything we need! It would be nice
         //       to share the code.
@@ -133,6 +131,13 @@ class APClusterPlacer {
         const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
         const auto& block_locs = g_vpr_ctx.placement().block_locs();
         auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
+        // If this block has already been placed, just return true.
+        // TODO: This should be investigated further. What I think is happening
+        //       is that a macro is being placed which contains another cluster.
+        //       This must be a carry chain. May need to rewrite the algorithm
+        //       below to use macros instead of clusters.
+        if (is_block_placed(clb_blk_id, block_locs))
+            return true;
         VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
         t_pl_macro pl_macro = get_macro(clb_blk_id);
         t_pl_loc to_loc;
@@ -170,6 +175,10 @@ class APClusterPlacer {
     bool exhaustively_place_cluster(ClusterBlockId clb_blk_id) {
         const auto& block_locs = g_vpr_ctx.placement().block_locs();
         auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
+        // If this block has already been placed, just return true.
+        // TODO: See similar comment above.
+        if (is_block_placed(clb_blk_id, block_locs))
+            return true;
         VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
         t_pl_macro pl_macro = get_macro(clb_blk_id);
         const PartitionRegion& pr = is_cluster_constrained(clb_blk_id) ? g_vpr_ctx.floorplanning().cluster_constraints[clb_blk_id] : get_device_partition_region();
@@ -346,6 +355,10 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
     for (APBlockId ap_blk_id : ap_netlist_.blocks()) {
         const t_pack_molecule* blk_mol = ap_netlist_.block_molecule(ap_blk_id);
         for (AtomBlockId atom_blk_id : blk_mol->atom_block_ids) {
+            // See issue #2791, some of the atom_block_ids may be invalid. They
+            // can safely be ignored.
+            if (!atom_blk_id.is_valid())
+                continue;
             // Ensure that this block is not in any other AP block. That would
             // be weird.
             VTR_ASSERT(!atom_to_ap_block[atom_blk_id].is_valid());
@@ -429,5 +442,10 @@ void FullLegalizer::legalize(const PartialPlacement& p_placement) {
                   "Aborting program.\n",
                   num_placement_errors);
     }
+
+    // TODO: This was taken from vpr_api. Not sure why it is needed. Should be
+    //       made part of the placement and verify placement should check for
+    //       it.
+    post_place_sync();
 }
 
diff --git a/vpr/src/analytical_place/partial_legalizer.cpp b/vpr/src/analytical_place/partial_legalizer.cpp
@@ -76,6 +76,10 @@ static inline PrimitiveVector get_primitive_mass(APBlockId blk_id,
     PrimitiveVector mass;
     const t_pack_molecule* mol = netlist.block_molecule(blk_id);
     for (AtomBlockId atom_blk_id : mol->atom_block_ids) {
+        // See issue #2791, some of the atom_block_ids may be invalid. They can
+        // safely be ignored.
+        if (!atom_blk_id.is_valid())
+            continue;
         const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id);
         VTR_ASSERT_DEBUG(model->index >= 0);
         mass.add_val_to_dim(get_model_mass(model), model->index);
@@ -354,6 +358,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
     // Create visited flags for each bin. Set the source to visited.
     vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
     bin_visited[src_bin_id] = true;
+    // Create a distance count for each bin from the src.
+    vtr::vector_map<LegalizerBinId, unsigned> bin_distance(bins_.size(), 0);
     // Flags to check if a specific model has been found in the given direction.
     // In this case, direction is the direction of the largest component of the
     // manhattan distance between the source bin and the target bin.
@@ -401,6 +407,11 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
         // Pop the bin from the queue.
         LegalizerBinId bin_id = q.front();
         q.pop();
+        // If the distance of this block from the source is too large, do not
+        // explore.
+        unsigned curr_bin_dist = bin_distance[bin_id];
+        if (curr_bin_dist > max_bin_neighbor_dist_)
+            continue;
         // Get the direct neighbors of the bin (neighbors that are directly
         // touching).
         auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_);
@@ -431,6 +442,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
             }
             // Mark this bin as visited and push it onto the queue.
             bin_visited[dir_neighbor_bin_id] = true;
+            // Update the distance.
+            bin_distance[dir_neighbor_bin_id] = curr_bin_dist + 1;
             // FIXME: This may be inneficient since it will do an entire BFS of
             //        the grid if a neighbor of a given type does not exist in
             //        a specific direction. Should add a check to see if it is
@@ -506,6 +519,7 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
             tile_bin_[x][y] = new_bin_id;
         }
     }
+
     // Get the number of models in the device.
     size_t num_models = get_num_models();
     // Connect the bins.
@@ -524,10 +538,14 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
             compute_neighbors_of_bin(tile_bin_[x][y], num_models);
         }
     }
+
     // Pre-compute the masses of the APBlocks
+    VTR_LOGV(log_verbosity_ >= 10, "Pre-computing the block masses...\n");
     for (APBlockId blk_id : netlist.blocks()) {
         block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist));
     }
+    VTR_LOGV(log_verbosity_ >= 10, "Finished pre-computing the block masses.\n");
+
     // Initialize the block_bins.
     block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID());
 }

diff --git a/vpr/src/analytical_place/partial_legalizer.h b/vpr/src/analytical_place/partial_legalizer.h
@@ -196,6 +196,18 @@ class FlowBasedLegalizer : public PartialLegalizer {
     ///        enough space to flow blocks.
     static constexpr size_t max_num_iterations_ = 100;
 
+    /// @brief The maximum number of hops away a neighbor of a bin can be. Where
+    ///        a hop is the minimum number of bins you need to pass through to
+    ///        get to this neighbor (manhattan distance in bins-space).
+    ///
+    /// This is used to speed up the computation of the neighbors of bins since
+    /// it reduces the amount of the graph that needs to be explored.
+    ///
+    /// TODO: This may need to be made per primitive type since some types may
+    ///       need to explore more of the architecture than others to find
+    ///       sufficient neighbors.
+    static constexpr unsigned max_bin_neighbor_dist_ = 4;
+
     /// @brief A vector of all the bins in the legalizer.
     vtr::vector_map<LegalizerBinId, LegalizerBin> bins_;
 

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
@@ -17,6 +17,10 @@
 #include "placer_breakpoint.h"
 #include "RL_agent_util.h"
 
+/**************************************************************************/
+/*************** Static Function Declarations *****************************/
+/**************************************************************************/
+
 /**
  * @brief Check if the setup slack has gotten better or worse due to block swap.
  *
@@ -38,6 +42,9 @@
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state);
 
+/*************************************************************************/
+/*************** Static Function Definitions *****************************/
+/*************************************************************************/
 
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state) {
@@ -78,6 +85,10 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
     return 1;
 }
 
+/**************************************************************************************/
+/*************** Member Function Definitions for t_annealing_state ********************/
+/**************************************************************************************/
+
 ///@brief Constructor: Initialize all annealing state variables and macros.
 t_annealing_state::t_annealing_state(float first_t,
                                      float first_rlim,
@@ -170,6 +181,10 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
                     + placer_opts.td_place_exp_first;
 }
 
+/**************************************************************************************/
+/*************** Member Function Definitions for PlacementAnnealer ********************/
+/**************************************************************************************/
+
 PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
                                      PlacerState& placer_state,
                                      t_placer_costs& costs,
@@ -217,7 +232,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
 
     int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched);
 
-
     if (placer_opts.inner_loop_recompute_divider != 0) {
         inner_recompute_limit_ = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
     } else {
@@ -258,10 +272,10 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
 
     // Update the starting temperature for placement annealing to a more appropriate value
-    annealing_state_.t = estimate_starting_temperature();
+    annealing_state_.t = estimate_starting_temperature_();
 }
 
-float PlacementAnnealer::estimate_starting_temperature() {
+float PlacementAnnealer::estimate_starting_temperature_() {
     if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) {
         return placer_opts_.anneal_sched.init_t;
     }
@@ -289,7 +303,7 @@ float PlacementAnnealer::estimate_starting_temperature() {
 #endif /*NO_GRAPHICS*/
 
         // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
-        e_move_result swap_result = try_swap(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);
+        e_move_result swap_result = try_swap_(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);
 
         if (swap_result == e_move_result::ACCEPTED) {
             num_accepted++;
@@ -322,9 +336,9 @@ float PlacementAnnealer::estimate_starting_temperature() {
     return init_temp;
 }
 
-e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
-                                          const t_place_algorithm& place_algorithm,
-                                          bool manual_move_enabled) {
+e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
+                                           const t_place_algorithm& place_algorithm,
+                                           bool manual_move_enabled) {
     /* Picks some block and moves it to another spot.  If this spot is
      * occupied, switch the blocks.  Assess the change in cost function.
      * rlim is the range limiter.
@@ -638,7 +652,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
             PlaceCritParams crit_params{annealing_state_.crit_exponent,
                                         placer_opts_.place_crit_limit};
 
-            //Update all timing related classes
+            // Update all timing related classes
             perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_,
                                        pin_timing_invalidator_, timing_info_, &costs_, placer_state_);
 
@@ -667,7 +681,7 @@ void PlacementAnnealer::placement_inner_loop() {
 
     // Inner loop begins
     for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, manual_move_enabled);
+        e_move_result swap_result = try_swap_(move_generator, placer_opts_.place_algorithm, manual_move_enabled);
 
         if (swap_result == e_move_result::ACCEPTED) {
             // Move was accepted.  Update statistics that are useful for the annealing schedule.
@@ -683,7 +697,6 @@ void PlacementAnnealer::placement_inner_loop() {
             /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
              * We do this only once in a while, since it is expensive.
              */
-
             const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_;
             // on last iteration don't recompute
             if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) {
+2 −1		.clang-tidy
+1 −1		.github/workflows/mac-builds-m1.yml
+6 −2		.github/workflows/mac-builds.yml
+2 −0		BUILD.bazel
+1 −0		CMake/CatchConfigOptions.cmake
+6 −1		CMakeLists.txt
+9 −0		docs/configuration.md
+4 −4		src/catch2/benchmark/detail/catch_stats.cpp
+1 −1		src/catch2/catch_timer.cpp
+4 −1		src/catch2/catch_tostring.cpp
+9 −0		src/catch2/catch_user_config.hpp.in
+39 −31		src/catch2/internal/catch_compiler_capabilities.hpp
+1 −1		src/catch2/internal/catch_console_colour.cpp
+1 −1		src/catch2/internal/catch_random_number_generator.cpp
+35 −26		src/catch2/matchers/catch_matchers_range_equals.hpp
+5 −2		src/catch2/reporters/catch_reporter_console.cpp
+0 −1		tests/ExtraTests/CMakeLists.txt
+5 −1		tests/SelfTest/Baselines/compact.sw.approved.txt
+5 −1		tests/SelfTest/Baselines/compact.sw.multi.approved.txt
+1 −1		tests/SelfTest/Baselines/console.std.approved.txt
+35 −1		tests/SelfTest/Baselines/console.sw.approved.txt
+35 −1		tests/SelfTest/Baselines/console.sw.multi.approved.txt
+3 −1		tests/SelfTest/Baselines/junit.sw.approved.txt
+3 −1		tests/SelfTest/Baselines/junit.sw.multi.approved.txt
+2 −0		tests/SelfTest/Baselines/sonarqube.sw.approved.txt
+2 −0		tests/SelfTest/Baselines/sonarqube.sw.multi.approved.txt
+9 −1		tests/SelfTest/Baselines/tap.sw.approved.txt
+9 −1		tests/SelfTest/Baselines/tap.sw.multi.approved.txt
+39 −1		tests/SelfTest/Baselines/xml.sw.approved.txt
+39 −1		tests/SelfTest/Baselines/xml.sw.multi.approved.txt
+19 −0		tests/SelfTest/UsageTests/MatchersRanges.tests.cpp