Skip to content

Commit

Permalink
dump net decomposition code
Browse files Browse the repository at this point in the history
  • Loading branch information
duck2 committed Sep 29, 2023
1 parent 18cc251 commit 9975fe9
Show file tree
Hide file tree
Showing 38 changed files with 2,721 additions and 643 deletions.
20 changes: 18 additions & 2 deletions libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "tatum/TimingGraphFwd.hpp"
#include "tatum/TimingConstraintsFwd.hpp"

#include "tatum/analyzers/AdaptiveSetupHoldTimingAnalyzer.hpp"
#include "tatum/graph_walkers.hpp"
#include "tatum/timing_analyzers.hpp"
#include "tatum/analyzers/full_timing_analyzers.hpp"
Expand Down Expand Up @@ -55,9 +56,9 @@ namespace tatum {
///Factor class to construct timing analyzers
///
///\tparam Visitor The analysis type visitor (e.g. SetupAnalysis)
///\tparam GraphWalker The graph walker to use (defaults to serial traversals)
///\tparam GraphWalker The graph walker to use
template<class Visitor,
class GraphWalker>
class... GraphWalkers>
struct AnalyzerFactory {

//We use the dependent_false template to detect if the un-specialized AnalyzerFactor
Expand Down Expand Up @@ -176,6 +177,21 @@ struct AnalyzerFactory<SetupHoldAnalysis,SerialIncrWalker> {
}
};

template<>
struct AnalyzerFactory<SetupHoldAnalysis,ParallelWalker,SerialIncrWalker> {

static std::unique_ptr<SetupHoldTimingAnalyzer> make(const TimingGraph& timing_graph,
const TimingConstraints& timing_constraints,
const DelayCalculator& delay_calc) {
return std::unique_ptr<SetupHoldTimingAnalyzer>(
new detail::AdaptiveSetupHoldTimingAnalyzer<ParallelWalker, SerialIncrWalker>(
timing_graph,
timing_constraints,
delay_calc)
);
}
};

} //namepsace

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace tatum {
///\tparam Visitor The analysis type visitor (e.g. SetupAnalysis)
///\tparam GraphWalker The graph walker to use (defaults to serial traversals)
template<class Visitor,
class GraphWalker=SerialWalker>
class... GraphWalkers>
struct AnalyzerFactory;

} //namepsace
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#pragma once
#include "tatum/TimingGraphFwd.hpp"
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/SetupHoldAnalysis.hpp"
#include "tatum/analyzers/SetupHoldTimingAnalyzer.hpp"
#include "tatum/base/validate_timing_graph_constraints.hpp"
#include "tatum/graph_walkers/TimingGraphWalker.hpp"

namespace tatum { namespace detail {

/** Threshold for AdaptiveSetupHoldTimingAnalyzer to use full updates.
* Expressed as fraction of all edges in timing graph. */
constexpr float full_update_threshold = 0.1;

/**
* A concrete implementation of a SetupHoldTimingAnalyzer.
*
* This is an adaptive analyzer: can do incremental updates if the number of invalidated
* nodes is small, falls back to a full update after a certain threshold to avoid the overhead.
*/
template<class FullWalker=SerialWalker, class IncrWalker=SerialIncrWalker>
class AdaptiveSetupHoldTimingAnalyzer : public SetupHoldTimingAnalyzer {
public:
AdaptiveSetupHoldTimingAnalyzer(const TimingGraph& timing_graph, const TimingConstraints& timing_constraints, const DelayCalculator& delay_calculator)
: SetupHoldTimingAnalyzer()
, timing_graph_(timing_graph)
, timing_constraints_(timing_constraints)
, delay_calculator_(delay_calculator)
, setup_hold_visitor_(timing_graph_.nodes().size(), timing_graph_.edges().size()) {
validate_timing_graph_constraints(timing_graph_, timing_constraints_);

//Initialize profiling data. Use full walker to store data for both
full_walker_.set_profiling_data("total_analysis_sec", 0.);
full_walker_.set_profiling_data("analysis_sec", 0.);
full_walker_.set_profiling_data("num_full_updates", 0.);
full_walker_.set_profiling_data("num_incr_updates", 0.);

mode_ = Mode::INCR;
n_modified_edges_ = 0;
max_modified_edges_ = timing_graph_.edges().size() * full_update_threshold;
}

protected:
//Update both setup and hold simultaneously (this is more efficient than updating them sequentially)
virtual void update_timing_impl() override {
auto start_time = Clock::now();

if(mode_ == Mode::INCR)
update_timing_incr_(setup_hold_visitor_);
else
update_timing_full_(setup_hold_visitor_);

clear_timing_incr_();

double analysis_sec = std::chrono::duration_cast<dsec>(Clock::now() - start_time).count();

//Record profiling data (use full walker to store it) (arbitrary choice)
double total_analysis_sec = analysis_sec + full_walker_.get_profiling_data("total_analysis_sec");
full_walker_.set_profiling_data("total_analysis_sec", total_analysis_sec);
full_walker_.set_profiling_data("analysis_sec", analysis_sec);
if(mode_ == Mode::INCR)
full_walker_.set_profiling_data("num_incr_updates", full_walker_.get_profiling_data("num_incr_updates") + 1);
else
full_walker_.set_profiling_data("num_full_updates", full_walker_.get_profiling_data("num_full_updates") + 1);

mode_ = Mode::INCR; /* We did our update, try to use incr until too many edges are modified */
}

//Update only setup timing
virtual void update_setup_timing_impl() override {
auto& setup_visitor = setup_hold_visitor_.setup_visitor();

if(mode_ == Mode::INCR)
update_timing_incr_(setup_visitor);
else
update_timing_full_(setup_visitor);
}

//Update only hold timing
virtual void update_hold_timing_impl() override {
auto& hold_visitor = setup_hold_visitor_.hold_visitor();

if(mode_ == Mode::INCR)
update_timing_incr_(hold_visitor);
else
update_timing_full_(hold_visitor);
}

virtual void invalidate_edge_impl(const EdgeId edge) override {
if(mode_ == Mode::FULL)
return;
incr_walker_.invalidate_edge(edge);
n_modified_edges_++;
if(n_modified_edges_ > max_modified_edges_)
mode_ = Mode::FULL;
}

virtual node_range modified_nodes_impl() const override {
if(mode_ == Mode::FULL)
return full_walker_.modified_nodes();
else
return incr_walker_.modified_nodes();
}

double get_profiling_data_impl(std::string key) const override {
return full_walker_.get_profiling_data(key);
}

size_t num_unconstrained_startpoints_impl() const override {
if(mode_ == Mode::FULL)
return full_walker_.num_unconstrained_startpoints();
else
return incr_walker_.num_unconstrained_startpoints();
}

size_t num_unconstrained_endpoints_impl() const override {
if(mode_ == Mode::FULL)
return full_walker_.num_unconstrained_endpoints();
else
return incr_walker_.num_unconstrained_endpoints();
}

TimingTags::tag_range setup_tags_impl(NodeId node_id) const override { return setup_hold_visitor_.setup_tags(node_id); }
TimingTags::tag_range setup_tags_impl(NodeId node_id, TagType type) const override { return setup_hold_visitor_.setup_tags(node_id, type); }
#ifdef TATUM_CALCULATE_EDGE_SLACKS
TimingTags::tag_range setup_edge_slacks_impl(EdgeId edge_id) const override { return setup_hold_visitor_.setup_edge_slacks(edge_id); }
#endif
TimingTags::tag_range setup_node_slacks_impl(NodeId node_id) const override { return setup_hold_visitor_.setup_node_slacks(node_id); }

TimingTags::tag_range hold_tags_impl(NodeId node_id) const override { return setup_hold_visitor_.hold_tags(node_id); }
TimingTags::tag_range hold_tags_impl(NodeId node_id, TagType type) const override { return setup_hold_visitor_.hold_tags(node_id, type); }
#ifdef TATUM_CALCULATE_EDGE_SLACKS
TimingTags::tag_range hold_edge_slacks_impl(EdgeId edge_id) const override { return setup_hold_visitor_.hold_edge_slacks(edge_id); }
#endif
TimingTags::tag_range hold_node_slacks_impl(NodeId node_id) const override { return setup_hold_visitor_.hold_node_slacks(node_id); }

private:
/** Update using the full walker */
void update_timing_full_(GraphVisitor& visitor){
full_walker_.do_reset(timing_graph_, visitor);

full_walker_.do_arrival_pre_traversal(timing_graph_, timing_constraints_, visitor);
full_walker_.do_arrival_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

full_walker_.do_required_pre_traversal(timing_graph_, timing_constraints_, visitor);
full_walker_.do_required_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

full_walker_.do_update_slack(timing_graph_, delay_calculator_, visitor);
}

/** Update using the incremental walker */
void update_timing_incr_(GraphVisitor& visitor){
if (never_updated_incr_) {
//Invalidate all edges
for (EdgeId edge : timing_graph_.edges()) {
incr_walker_.invalidate_edge(edge);
}

//Only need to pre-traverse the first update
incr_walker_.do_arrival_pre_traversal(timing_graph_, timing_constraints_, visitor);
}

incr_walker_.do_arrival_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

if (never_updated_incr_) {
//Only need to pre-traverse the first update
incr_walker_.do_required_pre_traversal(timing_graph_, timing_constraints_, visitor);
}

incr_walker_.do_required_traversal(timing_graph_, timing_constraints_, delay_calculator_, visitor);

incr_walker_.do_update_slack(timing_graph_, delay_calculator_, visitor);
}

/* Clear incremental timing info */
void clear_timing_incr_(){
incr_walker_.clear_invalidated_edges();

n_modified_edges_ = 0;
never_updated_incr_ = false;
}

const TimingGraph& timing_graph_;
const TimingConstraints& timing_constraints_;
const DelayCalculator& delay_calculator_;
SetupHoldAnalysis setup_hold_visitor_;

FullWalker full_walker_;
IncrWalker incr_walker_;
enum class Mode { FULL, INCR };
Mode mode_;

bool never_updated_incr_ = true;
size_t max_modified_edges_;
std::atomic_size_t n_modified_edges_ = 0;

typedef std::chrono::duration<double> dsec;
typedef std::chrono::high_resolution_clock Clock;
};

}} //namepsace
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/HoldAnalysis.hpp"
#include "tatum/analyzers/HoldTimingAnalyzer.hpp"
#include "tatum/base/validate_timing_graph_constraints.hpp"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#pragma once
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/SetupHoldAnalysis.hpp"
#include "tatum/analyzers/SetupHoldTimingAnalyzer.hpp"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once
#include "tatum/graph_walkers/SerialWalker.hpp"
#include "tatum/graph_walkers/SerialIncrWalker.hpp"
#include "tatum/SetupAnalysis.hpp"
#include "tatum/analyzers/SetupTimingAnalyzer.hpp"
#include "tatum/base/validate_timing_graph_constraints.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace tatum {

/**
* A parallel timing analyzer which traveres the timing graph in a levelized
* A parallel timing analyzer which traverses the timing graph in a levelized
* manner. However nodes within each level are processed in parallel using
* Thread Building Blocks (TBB). If TBB is not available it operates serially and is
* equivalent to the SerialWalker.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ namespace tatum {
*
* If TATUM_INCR_BLOCK_INVALIDATION is defined:
* All of a nodes tags associated with an invalidated edge are invalidated.
* This is a robust but pessimisitc approach (it invalidates more tags than
* This is a robust but pessimistic approach (it invalidates more tags than
* strictly required). As a result all nodes processed will report having been
* modified, meaning their decendents/predecessors will also be invalidated
* even if in reality the recalculated tags are identical to the previous ones
* (i.e. nothing has really changed).
*
* Ohterwise, the analyzer performs edge invalidation:
* Only node tags which are dominanted by an invalidated edge are invalidated.
* Only node tags which are dominated by an invalidated edge are invalidated.
* This is a less pessimistic approach, and means when processed nodes which
* don't have any changed tags will report as being unmodified. This significantly
* prunes the amount of the timing graph which needs to be updated (as unmodified
Expand All @@ -37,7 +37,7 @@ namespace tatum {
* manner. Unlike SerialWalker it attempts to incrementally (rather than
* fully) update based on invalidated edges.
*
* To performan an incremental traversal, the st of invalidated edges
* To perform an incremental traversal, the set of invalidated edges
* is processed to identify nodes which will need to be re-evaluated for
* the arrival and/or required traversals.
*
Expand Down
5 changes: 0 additions & 5 deletions libs/librrgraph/src/base/rr_graph_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,11 +667,6 @@ class t_rr_graph_storage {
static inline Direction get_node_direction(
vtr::array_view_id<RRNodeId, const t_rr_node_data> node_storage,
RRNodeId id) {
auto& node_data = node_storage[id];
if (node_data.type_ != CHANX && node_data.type_ != CHANY) {
VTR_LOG_ERROR("Attempted to access RR node 'direction' for non-channel type '%s'",
rr_node_typename[node_data.type_]);
}
return node_storage[id].dir_side_.direction;
}

Expand Down
12 changes: 12 additions & 0 deletions libs/libvtrutil/src/vtr_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <map>
#include <vector>
#include <cmath>
#include <cstdint>

#include "vtr_assert.h"

Expand Down Expand Up @@ -163,6 +164,17 @@ bool isclose(T a, T b) {
return isclose<T>(a, b, DEFAULT_REL_TOL, DEFAULT_ABS_TOL);
}

/** Log2, round down.
* From https://stackoverflow.com/a/51351885 */
static inline uint64_t log2_floor(uint64_t x) {
return 63U - __builtin_clzl(x);
}

/** Log2, round up */
static inline uint64_t log2_ceil(uint64_t x) {
return log2_floor(x - 1) + 1;
}

} // namespace vtr

#endif
17 changes: 10 additions & 7 deletions utils/route_diag/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,16 @@ static void do_one_route(const Netlist<>& net_list,
-1,
false,
std::unordered_map<RRNodeId, int>());
std::tie(found_path, std::ignore, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(),
sink_node,
cost_params,
bounding_box,
router_stats,
conn_params,
true);
std::tie(found_path, std::ignore, cheapest) = router.timing_driven_route_connection_from_route_tree(
tree.root(),
tree.root().inode,
sink_node,
cost_params,
bounding_box,
router_stats,
conn_params,
true
);

if (found_path) {
VTR_ASSERT(cheapest.index == sink_node);
Expand Down
3 changes: 3 additions & 0 deletions vpr/src/base/SetupVPR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ void SetupVPR(const t_options* Options,
/* Set seed for pseudo-random placement, default seed to 1 */
vtr::srandom(PlacerOpts->seed);

/* Make num_workers available to the router */
RouterOpts->num_workers = vpr_setup->num_workers;

{
vtr::ScopedStartFinishTimer t("Building complex block graph");
alloc_and_load_all_pb_graphs(PowerOpts->do_power, RouterOpts->flat_routing);
Expand Down
2 changes: 2 additions & 0 deletions vpr/src/base/read_netlist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,11 +1057,13 @@ static void load_external_nets_and_cb(ClusteredNetlist& clb_nlist) {
int logical_pin = clb_nlist.pin_logical_index(pin_id);
int physical_pin = get_physical_pin(tile_type, block_type, logical_pin);

/* XXX: Silence warning
if (tile_type->is_ignored_pin[physical_pin] != is_ignored_net) {
VTR_LOG_WARN(
"Netlist connects net %s to both global and non-global pins.\n",
clb_nlist.net_name(net_id).c_str());
}
*/
}
}

Expand Down
Loading

0 comments on commit 9975fe9

Please sign in to comment.