Skip to content

Commit

Permalink
Merge pull request #2800 from verilog-to-routing/temp_annealer_class
Browse files Browse the repository at this point in the history
PlacementAnnealer class
  • Loading branch information
vaughnbetz authored Nov 18, 2024
2 parents c8d3111 + 8c0fdfc commit e7f964e
Show file tree
Hide file tree
Showing 63 changed files with 1,790 additions and 2,307 deletions.
23 changes: 11 additions & 12 deletions libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ NodeId TimingGraph::add_node(const NodeType type) {

EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const NodeId sink_node) {
//We require that the source/sink node must already be in the graph,
// so we can update them with thier edge references
// so we can update them with their edge references
TATUM_ASSERT(valid_node_id(src_node));
TATUM_ASSERT(valid_node_id(sink_node));

Expand All @@ -211,7 +211,7 @@ EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const N
EdgeId edge_id = EdgeId(edge_ids_.size());
edge_ids_.push_back(edge_id);

//Create the edgge
//Create the edge
edge_types_.push_back(type);
edge_src_nodes_.push_back(src_node);
edge_sink_nodes_.push_back(sink_node);
Expand Down Expand Up @@ -318,7 +318,7 @@ GraphIdMaps TimingGraph::compress() {
levelize();
validate();

return {node_id_map, edge_id_map};
return {std::move(node_id_map), std::move(edge_id_map)};
}

void TimingGraph::levelize() {
Expand Down Expand Up @@ -474,21 +474,20 @@ GraphIdMaps TimingGraph::optimize_layout() {

levelize();

return {node_id_map, edge_id_map};
return {std::move(node_id_map), std::move(edge_id_map)};
}

tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const {
//Make all edges in a level be contiguous in memory

//Determine the edges driven by each level of the graph
std::vector<std::vector<EdgeId>> edge_levels;
std::vector<std::vector<EdgeId>> edge_levels(levels().size());
for(LevelId level_id : levels()) {
edge_levels.push_back(std::vector<EdgeId>());
for(auto node_id : level_nodes(level_id)) {
for(NodeId node_id : level_nodes(level_id)) {

//We walk the nodes according to the input-edge order.
//This is the same order used by the arrival-time traversal (which is responsible
//for most of the analyzer run-time), so matching it's order exactly results in
//for most of the analyzer run-time), so matching its order exactly results in
//better cache locality
for(EdgeId edge_id : node_in_edges(node_id)) {

Expand All @@ -498,7 +497,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
}
}

//Maps from from original to new edge id, used to update node to edge refs
//Maps from original to new edge id, used to update node to edge refs
tatum::util::linear_map<EdgeId,EdgeId> orig_to_new_edge_id(edges().size());

//Determine the new order
Expand Down Expand Up @@ -874,7 +873,7 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
}

std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg,
const std::vector<NodeId> through_nodes,
const std::vector<NodeId>& through_nodes,
size_t max_depth) {
std::vector<NodeId> nodes;

Expand All @@ -890,7 +889,7 @@ std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg,
}

std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg,
const std::vector<NodeId> sinks,
const std::vector<NodeId>& sinks,
size_t max_depth) {
std::vector<NodeId> nodes;

Expand All @@ -905,7 +904,7 @@ std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg,
}

std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg,
const std::vector<NodeId> sources,
const std::vector<NodeId>& sources,
size_t max_depth) {
std::vector<NodeId> nodes;

Expand Down
36 changes: 21 additions & 15 deletions libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
* store all edges as bi-directional edges.
*
* NOTE: We store only the static connectivity and node information in the 'TimingGraph' class.
* Other dynamic information (edge delays, node arrival/required times) is stored seperately.
* This means that most actions opearting on the timing graph (e.g. TimingAnalyzers) only
* Other dynamic information (edge delays, node arrival/required times) is stored separately.
* This means that most actions operating on the timing graph (e.g. TimingAnalyzers) only
* require read-only access to the timing graph.
*
* Accessing Graph Data
Expand All @@ -28,9 +28,9 @@
* rather than the more typical "Array of Structs (AoS)" data layout.
*
* By using a SoA layout we keep all data for a particular field (e.g. node types) in contiguous
* memory. Using an AoS layout the various fields accross nodes would *not* be contiguous
* memory. Using an AoS layout the various fields across nodes would *not* be contiguous
* (although the different fields within each object (e.g. a TimingNode class) would be contiguous.
* Since we typically perform operations on particular fields accross nodes the SoA layout performs
* Since we typically perform operations on particular fields across nodes the SoA layout performs
* better (and enables memory ordering optimizations). The edges are also stored in a SOA format.
*
* The SoA layout also motivates the ID based approach, which allows direct indexing into the required
Expand All @@ -48,11 +48,12 @@
* and ensures that each cache line pulled into the cache will (likely) be accessed multiple times
* before being evicted.
*
* Note that performing these optimizations is currently done explicity by calling the optimize_edge_layout()
* and optimize_node_layout() member functions. In the future (particularily if incremental modification
* Note that performing these optimizations is currently done explicitly by calling the optimize_edge_layout()
* and optimize_node_layout() member functions. In the future (particularly if incremental modification
* support is added), it may be a good idea apply these modifications automatically as needed.
*
*/
#include <utility>
#include <vector>
#include <set>
#include <limits>
Expand Down Expand Up @@ -149,7 +150,7 @@ class TimingGraph {

///\pre The graph must be levelized.
///\returns A range containing the nodes which are primary inputs (i.e. SOURCE's with no fanin, corresponding to top level design inputs pins)
///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incomming edges from the clock network)
///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incoming edges from the clock network)
///\see levelize()
node_range primary_inputs() const {
TATUM_ASSERT_MSG(is_levelized_, "Timing graph must be levelized");
Expand Down Expand Up @@ -282,7 +283,7 @@ class TimingGraph {
//Node data
tatum::util::linear_map<NodeId,NodeId> node_ids_; //The node IDs in the graph
tatum::util::linear_map<NodeId,NodeType> node_types_; //Type of node
tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incomiing edge IDs for node
tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incoming edge IDs for node
tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_out_edges_; //Out going edge IDs for node
tatum::util::linear_map<NodeId,LevelId> node_levels_; //Out going edge IDs for node

Expand All @@ -293,12 +294,12 @@ class TimingGraph {
tatum::util::linear_map<EdgeId,NodeId> edge_src_nodes_; //Source node for each edge
tatum::util::linear_map<EdgeId,bool> edges_disabled_;

//Auxilary graph-level info, filled in by levelize()
//Auxiliary graph-level info, filled in by levelize()
tatum::util::linear_map<LevelId,LevelId> level_ids_; //The level IDs in the graph
tatum::util::linear_map<LevelId,std::vector<NodeId>> level_nodes_; //Nodes in each level
std::vector<NodeId> primary_inputs_; //Primary input nodes of the timing graph.
std::vector<NodeId> logical_outputs_; //Logical output nodes of the timing graph.
bool is_levelized_ = false; //Inidcates if the current levelization is valid
bool is_levelized_ = false; //Indicates if the current levelization is valid

bool allow_dangling_combinational_nodes_ = false;

Expand All @@ -310,26 +311,31 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
//Returns the set of nodes transitively connected (either fanin or fanout) to nodes in through_nodes
//up to max_depth (default infinite) hops away
std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg,
const std::vector<NodeId> through_nodes,
const std::vector<NodeId>& through_nodes,
size_t max_depth=std::numeric_limits<size_t>::max());

//Returns the set of nodes in the transitive fanin of nodes in sinks up to max_depth (default infinite) hops away
std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg,
const std::vector<NodeId> sinks,
const std::vector<NodeId>& sinks,
size_t max_depth=std::numeric_limits<size_t>::max());

//Returns the set of nodes in the transitive fanout of nodes in sources up to max_depth (default infinite) hops away
std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg,
const std::vector<NodeId> sources,
const std::vector<NodeId>& sources,
size_t max_depth=std::numeric_limits<size_t>::max());

EdgeType infer_edge_type(const TimingGraph& tg, EdgeId edge);

//Mappings from old to new IDs
struct GraphIdMaps {
GraphIdMaps(tatum::util::linear_map<NodeId,NodeId> node_map,
tatum::util::linear_map<EdgeId,EdgeId> edge_map)
GraphIdMaps(const tatum::util::linear_map<NodeId,NodeId>& node_map,
const tatum::util::linear_map<EdgeId,EdgeId>& edge_map)
: node_id_map(node_map), edge_id_map(edge_map) {}

GraphIdMaps(tatum::util::linear_map<NodeId,NodeId>&& node_map,
tatum::util::linear_map<EdgeId,EdgeId>&& edge_map)
: node_id_map(std::move(node_map)), edge_id_map(std::move(edge_map)) {}

tatum::util::linear_map<NodeId,NodeId> node_id_map;
tatum::util::linear_map<EdgeId,EdgeId> edge_id_map;
};
Expand Down
6 changes: 3 additions & 3 deletions libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace tatum {
* This file defines the AnalyzerFactory class used to construct timing analyzers.
*
* We assume that the user has already defined the timing graph, constraints and
* thier own delay calculator:
* their own delay calculator:
*
* TimingGraph timing_graph;
* TimingConstraints timing_constraints;
Expand All @@ -33,7 +33,7 @@ namespace tatum {
* timing_constraints,
* delay_calculator);
*
* We can similarily generate analyzers for other types of analysis, for instance Hold:
* We can similarly generate analyzers for other types of analysis, for instance Hold:
*
* auto hold_analyzer = AnalyzerFactory<SetupAnalysis>::make(timing_graph,
* timing_constraints,
Expand All @@ -45,7 +45,7 @@ namespace tatum {
* timing_constraints,
* delay_calculator);
*
* The AnalzyerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class:
* The AnalyzerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class:
*
* SetupAnalysis => SetupTimingAnalyzer
* HoldAnalysis => HoldTimingAnalyzer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
namespace tatum {

/**
* An exmaple DelayCalculator implementation which takes
* An example DelayCalculator implementation which takes
* a vector of fixed pre-calculated edge delays
*
* \see DelayCalculator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class GraphVisitor {
virtual void do_reset_node_arrival_tags_from_origin(const NodeId node_id, const NodeId origin) = 0;
virtual void do_reset_node_required_tags_from_origin(const NodeId node_id, const NodeId origin) = 0;

//Returns true if the specified source/sink is unconstrainted
//Returns true if the specified source/sink is unconstrained
virtual bool do_arrival_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0;
virtual bool do_required_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0;

Expand Down
4 changes: 4 additions & 0 deletions libs/libarchfpga/src/device_grid.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class DeviceGrid {
size_t width() const { return grid_.dim_size(1); }
///@brief Return the height of the grid at the specified layer
size_t height() const { return grid_.dim_size(2); }
///@brief Return the grid dimensions in (# of layers, width, height) format
std::tuple<size_t, size_t, size_t> dim_sizes() const {
return {grid_.dim_size(0), grid_.dim_size(1), grid_.dim_size(2)};
}

///@brief Return the size of the flattened grid on the given layer
inline size_t grid_size() const {
Expand Down
33 changes: 3 additions & 30 deletions vpr/src/base/SetupVPR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ void SetupVPR(const t_options* options,
t_packer_opts* packerOpts,
t_placer_opts* placerOpts,
t_ap_opts* apOpts,
t_annealing_sched* annealSched,
t_router_opts* routerOpts,
t_analysis_opts* analysisOpts,
t_noc_opts* nocOpts,
Expand Down Expand Up @@ -140,7 +139,7 @@ void SetupVPR(const t_options* options,

SetupNetlistOpts(*options, *netlistOpts);
SetupPlacerOpts(*options, placerOpts);
SetupAnnealSched(*options, annealSched);
SetupAnnealSched(*options, &placerOpts->anneal_sched);
SetupRouterOpts(*options, routerOpts);
SetupAnalysisOpts(*options, *analysisOpts);
SetupPowerOpts(*options, powerOpts, arch);
Expand Down Expand Up @@ -395,7 +394,7 @@ static void SetupSwitches(const t_arch& Arch,
device_ctx.delayless_switch_idx = RoutingArch->delayless_switch;

//Warn about non-zero Cout values for the ipin switch, since these values have no effect.
//VPR do not model the R/C's of block internal routing connectsion.
//VPR do not model the R/C's of block internal routing connection.
//
//Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO)
if (device_ctx.arch_switch_inf[RoutingArch->wire_to_arch_ipin_switch].Cout != 0.) {
Expand Down Expand Up @@ -531,31 +530,6 @@ static void SetupAnnealSched(const t_options& Options,
VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n");
}

AnnealSched->alpha_min = Options.PlaceAlphaMin;
if (AnnealSched->alpha_min >= 1 || AnnealSched->alpha_min <= 0) {
VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_min must be between 0 and 1 exclusive.\n");
}

AnnealSched->alpha_max = Options.PlaceAlphaMax;
if (AnnealSched->alpha_max >= 1 || AnnealSched->alpha_max <= AnnealSched->alpha_min) {
VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_max must be between alpha_min and 1 exclusive.\n");
}

AnnealSched->alpha_decay = Options.PlaceAlphaDecay;
if (AnnealSched->alpha_decay >= 1 || AnnealSched->alpha_decay <= 0) {
VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_decay must be between 0 and 1 exclusive.\n");
}

AnnealSched->success_min = Options.PlaceSuccessMin;
if (AnnealSched->success_min >= 1 || AnnealSched->success_min <= 0) {
VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_min must be between 0 and 1 exclusive.\n");
}

AnnealSched->success_target = Options.PlaceSuccessTarget;
if (AnnealSched->success_target >= 1 || AnnealSched->success_target <= 0) {
VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_target must be between 0 and 1 exclusive.\n");
}

AnnealSched->type = Options.anneal_sched_type;
}

Expand Down Expand Up @@ -600,7 +574,6 @@ void SetupPackerOpts(const t_options& Options,
//TODO: document?
PackerOpts->inter_cluster_net_delay = 1.0; /* DEFAULT */
PackerOpts->auto_compute_inter_cluster_net_delay = true;
PackerOpts->packer_algorithm = PACK_GREEDY; /* DEFAULT */

PackerOpts->device_layout = Options.device_layout;

Expand Down Expand Up @@ -782,7 +755,7 @@ static void SetupServerOpts(const t_options& Options, t_server_opts* ServerOpts)
}

static void find_ipin_cblock_switch_index(const t_arch& Arch, int& wire_to_arch_ipin_switch, int& wire_to_arch_ipin_switch_between_dice) {
for (auto cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) {
for (int cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) {
int ipin_cblock_switch_index = UNDEFINED;
for (int iswitch = 0; iswitch < (int)Arch.switches.size(); ++iswitch) {
if (Arch.switches[iswitch].name == Arch.ipin_cblock_switch_name[cb_switch_name_index]) {
Expand Down
1 change: 0 additions & 1 deletion vpr/src/base/SetupVPR.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ void SetupVPR(const t_options* Options,
t_packer_opts* PackerOpts,
t_placer_opts* PlacerOpts,
t_ap_opts* APOpts,
t_annealing_sched* AnnealSched,
t_router_opts* RouterOpts,
t_analysis_opts* AnalysisOpts,
t_noc_opts* NocOpts,
Expand Down
Loading

0 comments on commit e7f964e

Please sign in to comment.