Skip to content

Commit

Permalink
Merge branch 'master' into tmep_placer_class
Browse files Browse the repository at this point in the history
  • Loading branch information
soheilshahrouz committed Nov 20, 2024
2 parents 291ec6f + 920e8ab commit 87f4e2e
Show file tree
Hide file tree
Showing 38 changed files with 6,877 additions and 182 deletions.
Binary file modified doc/src/quickstart/blink_implementation.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
318 changes: 188 additions & 130 deletions doc/src/quickstart/index.rst

Large diffs are not rendered by default.

Binary file modified doc/src/quickstart/tseng_blk1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified doc/src/quickstart/tseng_nets.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions libs/EXTERNAL/libblifparse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ add_library(libblifparse STATIC
target_include_directories(libblifparse PUBLIC ${LIB_INCLUDE_DIRS} ${CMAKE_CURRENT_BINARY_DIR})
set_target_properties(libblifparse PROPERTIES PREFIX "") #Avoid extra 'lib' prefix

# Set the read buffer size in the generated lexers. This reduces the number of
# syscalls since the default is only 1kB.
target_compile_definitions(libblifparse PRIVATE YY_READ_BUF_SIZE=1048576)

#Create the test executable
add_executable(blifparse_test src/main.cpp)
target_link_libraries(blifparse_test libblifparse)
Expand Down
2 changes: 1 addition & 1 deletion libs/EXTERNAL/libcatch2
Submodule libcatch2 updated 31 files
+2 −1 .clang-tidy
+1 −1 .github/workflows/mac-builds-m1.yml
+6 −2 .github/workflows/mac-builds.yml
+2 −0 BUILD.bazel
+1 −0 CMake/CatchConfigOptions.cmake
+6 −1 CMakeLists.txt
+9 −0 docs/configuration.md
+4 −4 src/catch2/benchmark/detail/catch_stats.cpp
+1 −1 src/catch2/catch_timer.cpp
+4 −1 src/catch2/catch_tostring.cpp
+9 −0 src/catch2/catch_user_config.hpp.in
+39 −31 src/catch2/internal/catch_compiler_capabilities.hpp
+1 −1 src/catch2/internal/catch_console_colour.cpp
+1 −1 src/catch2/internal/catch_random_number_generator.cpp
+35 −26 src/catch2/matchers/catch_matchers_range_equals.hpp
+5 −2 src/catch2/reporters/catch_reporter_console.cpp
+0 −1 tests/ExtraTests/CMakeLists.txt
+5 −1 tests/SelfTest/Baselines/compact.sw.approved.txt
+5 −1 tests/SelfTest/Baselines/compact.sw.multi.approved.txt
+1 −1 tests/SelfTest/Baselines/console.std.approved.txt
+35 −1 tests/SelfTest/Baselines/console.sw.approved.txt
+35 −1 tests/SelfTest/Baselines/console.sw.multi.approved.txt
+3 −1 tests/SelfTest/Baselines/junit.sw.approved.txt
+3 −1 tests/SelfTest/Baselines/junit.sw.multi.approved.txt
+2 −0 tests/SelfTest/Baselines/sonarqube.sw.approved.txt
+2 −0 tests/SelfTest/Baselines/sonarqube.sw.multi.approved.txt
+9 −1 tests/SelfTest/Baselines/tap.sw.approved.txt
+9 −1 tests/SelfTest/Baselines/tap.sw.multi.approved.txt
+39 −1 tests/SelfTest/Baselines/xml.sw.approved.txt
+39 −1 tests/SelfTest/Baselines/xml.sw.multi.approved.txt
+19 −0 tests/SelfTest/UsageTests/MatchersRanges.tests.cpp
2 changes: 1 addition & 1 deletion libs/EXTERNAL/libezgl/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)

# create the project
project(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
cmake_minimum_required(VERSION 3.10 FATAL_ERROR)

project(
basic-application
Expand Down
9 changes: 4 additions & 5 deletions libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,14 +481,13 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
//Make all edges in a level be contiguous in memory

//Determine the edges driven by each level of the graph
std::vector<std::vector<EdgeId>> edge_levels;
std::vector<std::vector<EdgeId>> edge_levels(levels().size());
for(LevelId level_id : levels()) {
edge_levels.emplace_back();
for(auto node_id : level_nodes(level_id)) {
for(NodeId node_id : level_nodes(level_id)) {

//We walk the nodes according to the input-edge order.
//This is the same order used by the arrival-time traversal (which is responsible
//for most of the analyzer run-time), so matching it's order exactly results in
//for most of the analyzer run-time), so matching its order exactly results in
//better cache locality
for(EdgeId edge_id : node_in_edges(node_id)) {

Expand All @@ -498,7 +497,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
}
}

//Maps from from original to new edge id, used to update node to edge refs
//Maps from original to new edge id, used to update node to edge refs
tatum::util::linear_map<EdgeId,EdgeId> orig_to_new_edge_id(edges().size());

//Determine the new order
Expand Down
10 changes: 8 additions & 2 deletions libs/libpugiutil/src/pugixml_loc.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
#include <cstdio>
#include <algorithm>
#include <vector>
#include "pugixml_util.hpp"
#include "pugixml_loc.hpp"

// The size of the read buffer when reading from a file.
#ifndef PUGI_UTIL_READ_BUF_SIZE
#define PUGI_UTIL_READ_BUF_SIZE 1048576
#endif // PUGI_UTIL_READ_BUF_SIZE

namespace pugiutil {

//Return the line number from the given offset
Expand Down Expand Up @@ -30,10 +36,10 @@ void loc_data::build_loc_data() {

std::ptrdiff_t offset = 0;

char buffer[1024];
std::vector<char> buffer(PUGI_UTIL_READ_BUF_SIZE);
std::size_t size;

while ((size = fread(buffer, 1, sizeof(buffer), f)) > 0) {
while ((size = fread(buffer.data(), 1, buffer.size() * sizeof(char), f)) > 0) {
for (std::size_t i = 0; i < size; ++i) {
if (buffer[i] == '\n') {
offsets_.push_back(offset + i);
Expand Down
9 changes: 7 additions & 2 deletions libs/libvtrutil/src/vtr_digest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@

#include <iostream>
#include <fstream>
#include <array>
#include <vector>

#include "picosha2.h"

// The size of the read buffer when reading from a file.
#ifndef VTR_UTIL_READ_BUF_SIZE
#define VTR_UTIL_READ_BUF_SIZE 1048576
#endif // VTR_UTIL_READ_BUF_SIZE

namespace vtr {

std::string secure_digest_file(const std::string& filepath) {
Expand All @@ -21,7 +26,7 @@ std::string secure_digest_stream(std::istream& is) {
//Read the stream in chunks and calculate the SHA256 digest
picosha2::hash256_one_by_one hasher;

std::array<char, 1024> buf;
std::vector<char> buf(VTR_UTIL_READ_BUF_SIZE);
while (!is.eof()) {
//Process a chunk
is.read(buf.data(), buf.size());
Expand Down
2 changes: 1 addition & 1 deletion utils/route_diag/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.16)
cmake_policy(VERSION 3.9)
cmake_policy(VERSION 3.10)

project("route_diag")

Expand Down
24 changes: 21 additions & 3 deletions vpr/src/analytical_place/full_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "logic_types.h"
#include "pack.h"
#include "physical_types.h"
#include "place_and_route.h"
#include "place_constraints.h"
#include "place_macro.h"
#include "verify_clustering.h"
Expand Down Expand Up @@ -103,9 +104,6 @@ class APClusterPlacer {
g_vpr_ctx.mutable_placement().cube_bb = false;
g_vpr_ctx.mutable_placement().compressed_block_grids = create_compressed_block_grids();

// Initialize the macros
blk_loc_registry.mutable_place_macros().alloc_and_load_placement_macros(directs);

// TODO: The next few steps will be basically a direct copy of the initial
// placement code since it does everything we need! It would be nice
// to share the code.
Expand Down Expand Up @@ -133,6 +131,13 @@ class APClusterPlacer {
const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
const auto& block_locs = g_vpr_ctx.placement().block_locs();
auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
// If this block has already been placed, just return true.
// TODO: This should be investigated further. What I think is happening
// is that a macro is being placed which contains another cluster.
// This must be a carry chain. May need to rewrite the algorithm
// below to use macros instead of clusters.
if (is_block_placed(clb_blk_id, block_locs))
return true;
VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
t_pl_macro pl_macro = get_macro(clb_blk_id);
t_pl_loc to_loc;
Expand Down Expand Up @@ -170,6 +175,10 @@ class APClusterPlacer {
bool exhaustively_place_cluster(ClusterBlockId clb_blk_id) {
const auto& block_locs = g_vpr_ctx.placement().block_locs();
auto& blk_loc_registry = g_vpr_ctx.mutable_placement().mutable_blk_loc_registry();
// If this block has already been placed, just return true.
// TODO: See similar comment above.
if (is_block_placed(clb_blk_id, block_locs))
return true;
VTR_ASSERT(!is_block_placed(clb_blk_id, block_locs) && "Block already placed. Is this intentional?");
t_pl_macro pl_macro = get_macro(clb_blk_id);
const PartitionRegion& pr = is_cluster_constrained(clb_blk_id) ? g_vpr_ctx.floorplanning().cluster_constraints[clb_blk_id] : get_device_partition_region();
Expand Down Expand Up @@ -346,6 +355,10 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,
for (APBlockId ap_blk_id : ap_netlist_.blocks()) {
const t_pack_molecule* blk_mol = ap_netlist_.block_molecule(ap_blk_id);
for (AtomBlockId atom_blk_id : blk_mol->atom_block_ids) {
// See issue #2791, some of the atom_block_ids may be invalid. They
// can safely be ignored.
if (!atom_blk_id.is_valid())
continue;
// Ensure that this block is not in any other AP block. That would
// be weird.
VTR_ASSERT(!atom_to_ap_block[atom_blk_id].is_valid());
Expand Down Expand Up @@ -429,5 +442,10 @@ void FullLegalizer::legalize(const PartialPlacement& p_placement) {
"Aborting program.\n",
num_placement_errors);
}

// TODO: This was taken from vpr_api. Not sure why it is needed. Should be
// made part of the placement and verify placement should check for
// it.
post_place_sync();
}

18 changes: 18 additions & 0 deletions vpr/src/analytical_place/partial_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ static inline PrimitiveVector get_primitive_mass(APBlockId blk_id,
PrimitiveVector mass;
const t_pack_molecule* mol = netlist.block_molecule(blk_id);
for (AtomBlockId atom_blk_id : mol->atom_block_ids) {
// See issue #2791, some of the atom_block_ids may be invalid. They can
// safely be ignored.
if (!atom_blk_id.is_valid())
continue;
const t_model* model = g_vpr_ctx.atom().nlist.block_model(atom_blk_id);
VTR_ASSERT_DEBUG(model->index >= 0);
mass.add_val_to_dim(get_model_mass(model), model->index);
Expand Down Expand Up @@ -354,6 +358,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
// Create visited flags for each bin. Set the source to visited.
vtr::vector_map<LegalizerBinId, bool> bin_visited(bins_.size(), false);
bin_visited[src_bin_id] = true;
// Create a distance count for each bin from the src.
vtr::vector_map<LegalizerBinId, unsigned> bin_distance(bins_.size(), 0);
// Flags to check if a specific model has been found in the given direction.
// In this case, direction is the direction of the largest component of the
// manhattan distance between the source bin and the target bin.
Expand Down Expand Up @@ -401,6 +407,11 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
// Pop the bin from the queue.
LegalizerBinId bin_id = q.front();
q.pop();
// If the distance of this block from the source is too large, do not
// explore.
unsigned curr_bin_dist = bin_distance[bin_id];
if (curr_bin_dist > max_bin_neighbor_dist_)
continue;
// Get the direct neighbors of the bin (neighbors that are directly
// touching).
auto direct_neighbors = get_direct_neighbors_of_bin(bin_id, bins_, tile_bin_);
Expand Down Expand Up @@ -431,6 +442,8 @@ void FlowBasedLegalizer::compute_neighbors_of_bin(LegalizerBinId src_bin_id, siz
}
// Mark this bin as visited and push it onto the queue.
bin_visited[dir_neighbor_bin_id] = true;
// Update the distance.
bin_distance[dir_neighbor_bin_id] = curr_bin_dist + 1;
// FIXME: This may be inneficient since it will do an entire BFS of
// the grid if a neighbor of a given type does not exist in
// a specific direction. Should add a check to see if it is
Expand Down Expand Up @@ -506,6 +519,7 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
tile_bin_[x][y] = new_bin_id;
}
}

// Get the number of models in the device.
size_t num_models = get_num_models();
// Connect the bins.
Expand All @@ -524,10 +538,14 @@ FlowBasedLegalizer::FlowBasedLegalizer(const APNetlist& netlist)
compute_neighbors_of_bin(tile_bin_[x][y], num_models);
}
}

// Pre-compute the masses of the APBlocks
VTR_LOGV(log_verbosity_ >= 10, "Pre-computing the block masses...\n");
for (APBlockId blk_id : netlist.blocks()) {
block_masses_.insert(blk_id, get_primitive_mass(blk_id, netlist));
}
VTR_LOGV(log_verbosity_ >= 10, "Finished pre-computing the block masses.\n");

// Initialize the block_bins.
block_bins_.resize(netlist.blocks().size(), LegalizerBinId::INVALID());
}
Expand Down
12 changes: 12 additions & 0 deletions vpr/src/analytical_place/partial_legalizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,18 @@ class FlowBasedLegalizer : public PartialLegalizer {
/// enough space to flow blocks.
static constexpr size_t max_num_iterations_ = 100;

/// @brief The maximum number of hops away a neighbor of a bin can be. Where
/// a hop is the minimum number of bins you need to pass through to
/// get to this neighbor (manhattan distance in bins-space).
///
/// This is used to speed up the computation of the neighbors of bins since
/// it reduces the amount of the graph that needs to be explored.
///
/// TODO: This may need to be made per primitive type since some types may
/// need to explore more of the architecture than others to find
/// sufficient neighbors.
static constexpr unsigned max_bin_neighbor_dist_ = 4;

/// @brief A vector of all the bins in the legalizer.
vtr::vector_map<LegalizerBinId, LegalizerBin> bins_;

Expand Down
33 changes: 23 additions & 10 deletions vpr/src/place/annealer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
#include "placer_breakpoint.h"
#include "RL_agent_util.h"

/**************************************************************************/
/*************** Static Function Declarations *****************************/
/**************************************************************************/

/**
* @brief Check if the setup slack has gotten better or worse due to block swap.
*
Expand All @@ -38,6 +42,9 @@
static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
const PlacerState& placer_state);

/*************************************************************************/
/*************** Static Function Definitions *****************************/
/*************************************************************************/

static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
const PlacerState& placer_state) {
Expand Down Expand Up @@ -78,6 +85,10 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
return 1;
}

/**************************************************************************************/
/*************** Member Function Definitions for t_annealing_state ********************/
/**************************************************************************************/

///@brief Constructor: Initialize all annealing state variables and macros.
t_annealing_state::t_annealing_state(float first_t,
float first_rlim,
Expand Down Expand Up @@ -170,6 +181,10 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
+ placer_opts.td_place_exp_first;
}

/**************************************************************************************/
/*************** Member Function Definitions for PlacementAnnealer ********************/
/**************************************************************************************/

PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
PlacerState& placer_state,
t_placer_costs& costs,
Expand Down Expand Up @@ -217,7 +232,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,

int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched);


if (placer_opts.inner_loop_recompute_divider != 0) {
inner_recompute_limit_ = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
} else {
Expand Down Expand Up @@ -258,10 +272,10 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);

// Update the starting temperature for placement annealing to a more appropriate value
annealing_state_.t = estimate_starting_temperature();
annealing_state_.t = estimate_starting_temperature_();
}

float PlacementAnnealer::estimate_starting_temperature() {
float PlacementAnnealer::estimate_starting_temperature_() {
if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) {
return placer_opts_.anneal_sched.init_t;
}
Expand Down Expand Up @@ -289,7 +303,7 @@ float PlacementAnnealer::estimate_starting_temperature() {
#endif /*NO_GRAPHICS*/

// Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
e_move_result swap_result = try_swap(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);
e_move_result swap_result = try_swap_(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);

if (swap_result == e_move_result::ACCEPTED) {
num_accepted++;
Expand Down Expand Up @@ -322,9 +336,9 @@ float PlacementAnnealer::estimate_starting_temperature() {
return init_temp;
}

e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
const t_place_algorithm& place_algorithm,
bool manual_move_enabled) {
e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
const t_place_algorithm& place_algorithm,
bool manual_move_enabled) {
/* Picks some block and moves it to another spot. If this spot is
* occupied, switch the blocks. Assess the change in cost function.
* rlim is the range limiter.
Expand Down Expand Up @@ -638,7 +652,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
PlaceCritParams crit_params{annealing_state_.crit_exponent,
placer_opts_.place_crit_limit};

//Update all timing related classes
// Update all timing related classes
perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_,
pin_timing_invalidator_, timing_info_, &costs_, placer_state_);

Expand Down Expand Up @@ -667,7 +681,7 @@ void PlacementAnnealer::placement_inner_loop() {

// Inner loop begins
for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) {
e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, manual_move_enabled);
e_move_result swap_result = try_swap_(move_generator, placer_opts_.place_algorithm, manual_move_enabled);

if (swap_result == e_move_result::ACCEPTED) {
// Move was accepted. Update statistics that are useful for the annealing schedule.
Expand All @@ -683,7 +697,6 @@ void PlacementAnnealer::placement_inner_loop() {
/* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
* We do this only once in a while, since it is expensive.
*/

const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_;
// on last iteration don't recompute
if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) {
Expand Down
Loading

0 comments on commit 87f4e2e

Please sign in to comment.