Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AP] Global Placer #2806

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions vpr/src/analytical_place/analytical_placement_flow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "atom_netlist.h"
#include "full_legalizer.h"
#include "gen_ap_netlist_from_atoms.h"
#include "global_placer.h"
#include "globals.h"
#include "partial_legalizer.h"
#include "partial_placement.h"
Expand Down Expand Up @@ -58,7 +59,6 @@ static void print_ap_netlist_stats(const APNetlist& netlist) {
}

void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
(void)vpr_setup;
// Start an overall timer for the Analytical Placement flow.
vtr::ScopedStartFinishTimer timer("Analytical Placement");

Expand All @@ -79,16 +79,9 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
print_ap_netlist_stats(ap_netlist);

// Run the Global Placer
// For now, just runs the solver and partial legalizer 10 times arbitrarily.
PartialPlacement p_placement(ap_netlist);
std::unique_ptr<AnalyticalSolver> solver = make_analytical_solver(e_analytical_solver::QP_HYBRID,
ap_netlist);
std::unique_ptr<PartialLegalizer> legalizer = make_partial_legalizer(e_partial_legalizer::FLOW_BASED,
ap_netlist);
for (size_t i = 0; i < 10; i++) {
solver->solve(i, p_placement);
legalizer->legalize(p_placement);
}
std::unique_ptr<GlobalPlacer> global_placer = make_global_placer(e_global_placer::SimPL,
ap_netlist);
PartialPlacement p_placement = global_placer->place();

// Verify that the partial placement is valid before running the full
// legalizer.
Expand Down
2 changes: 0 additions & 2 deletions vpr/src/analytical_place/full_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,6 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,

// FIXME: Allocate and load moveable blocks?
// - This may be needed to perform SA. Not needed right now.

// TODO: Check initial placement legality
}

void FullLegalizer::legalize(const PartialPlacement& p_placement) {
Expand Down
134 changes: 134 additions & 0 deletions vpr/src/analytical_place/global_placer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* @file
* @author Alex Singer
* @date October 2024
* @brief The definitions of the global placers used in the AP flow and their
* base class.
*/

#include "global_placer.h"
#include <cstdio>
#include <memory>
#include "analytical_solver.h"
#include "ap_netlist.h"
#include "partial_legalizer.h"
#include "partial_placement.h"
#include "vpr_error.h"
#include "vtr_log.h"
#include "vtr_time.h"

std::unique_ptr<GlobalPlacer> make_global_placer(e_global_placer placer_type,
const APNetlist& netlist) {
// Based on the placer type passed in, build the global placer.
switch (placer_type) {
case e_global_placer::SimPL:
return std::make_unique<SimPLGlobalPlacer>(netlist);
default:
VPR_FATAL_ERROR(VPR_ERROR_AP,
"Unrecognized global placer type");

}
}

SimPLGlobalPlacer::SimPLGlobalPlacer(const APNetlist& netlist) : GlobalPlacer(netlist) {
// This can be a long method. Good to time this to see how long it takes to
// construct the global placer.
vtr::ScopedStartFinishTimer global_placer_building_timer("Constructing Global Placer");
// Build the solver.
solver_ = make_analytical_solver(e_analytical_solver::QP_HYBRID,
netlist);
// Build the partial legalizer
partial_legalizer_ = make_partial_legalizer(e_partial_legalizer::FLOW_BASED,
netlist);
}

/**
* @brief Helper method to print the header of the per-iteration status updates
* of the global placer.
*/
static void print_SimPL_status_header() {
VTR_LOG("---- ---------------- ---------------- ----------- -------------- ----------\n");
VTR_LOG("Iter Lower Bound HPWL Upper Bound HPWL Solver Time Legalizer Time Total Time\n");
VTR_LOG(" (sec) (sec) (sec)\n");
VTR_LOG("---- ---------------- ---------------- ----------- -------------- ----------\n");
}

/**
* @brief Helper method to print the per-iteration status of the global placer.
*/
static void print_SimPL_status(size_t iteration,
double lb_hpwl,
double ub_hpwl,
float solver_time,
float legalizer_time,
float total_time) {
// Iteration
VTR_LOG("%4zu", iteration);

// Lower Bound HPWL
VTR_LOG(" %16.2f", lb_hpwl);

// Upper Bound HPWL
VTR_LOG(" %16.2f", ub_hpwl);

// Solver runtime
VTR_LOG(" %11.3f", solver_time);

// Legalizer runtime
VTR_LOG(" %14.3f", legalizer_time);

// Total runtime
VTR_LOG(" %10.3f", total_time);

VTR_LOG("\n");

fflush(stdout);
}

PartialPlacement SimPLGlobalPlacer::place() {
// Create a timer to time the entire global placement time.
vtr::ScopedStartFinishTimer global_placer_time("AP Global Placer");
// Create a timer to keep track of how long the solver and legalizer take.
vtr::Timer runtime_timer;
// Print the status header.
if (log_verbosity_ >= 1)
print_SimPL_status_header();
// Initialialize the partial placement object.
PartialPlacement p_placement(netlist_);
// Run the global placer.
for (size_t i = 0; i < max_num_iterations_; i++) {
float iter_start_time = runtime_timer.elapsed_sec();

// Run the solver.
float solver_start_time = runtime_timer.elapsed_sec();
solver_->solve(i, p_placement);
float solver_end_time = runtime_timer.elapsed_sec();
double lb_hpwl = p_placement.get_hpwl(netlist_);

// Run the legalizer.
float legalizer_start_time = runtime_timer.elapsed_sec();
partial_legalizer_->legalize(p_placement);
float legalizer_end_time = runtime_timer.elapsed_sec();
double ub_hpwl = p_placement.get_hpwl(netlist_);

// Print some stats
if (log_verbosity_ >= 1) {
float iter_end_time = runtime_timer.elapsed_sec();
print_SimPL_status(i, lb_hpwl, ub_hpwl,
solver_end_time - solver_start_time,
legalizer_end_time - legalizer_start_time,
iter_end_time - iter_start_time);
}

// Exit condition: If the upper-bound and lower-bound HPWLs are
// sufficiently close together then stop.
double hpwl_relative_gap = (ub_hpwl - lb_hpwl) / ub_hpwl;
if (hpwl_relative_gap < target_hpwl_relative_gap_)
break;
}
// Return the placement from the final iteration.
// TODO: investigate saving the best solution found so far. It should be
// cheap to save a copy of the PartialPlacement object.
return p_placement;
}

143 changes: 143 additions & 0 deletions vpr/src/analytical_place/global_placer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/**
* @file
* @author Alex Singer
* @date October 2024
* @brief The declarations of the Global Placer base class which is used to
* define the functionality of all global placers in the AP flow.
*
* A Global Placer creates a Partial Placement given only the netlist and the
* architecture. It uses analytical techniques (i.e. efficient numerical
* minimization of an objective function of a placement) to find a placement
* that optimizes for objectives subject to some of the constraints of the FPGA
* architecture.
*/

#pragma once

#include <memory>

// Forward declarations
class APNetlist;
class AnalyticalSolver;
class PartialPlacement;
class PartialLegalizer;

/**
* @brief Enumeration of all of the global placers currently implemented in VPR.
*/
enum class e_global_placer {
SimPL // Global placer based on the SimPL paper.
};

/**
* @brief The Global Placer base class
*
* This declares the functionality that all Global Placers will use. This
* provides a standard interface for the global placers so they can be used
* interchangably. This makes it very easy to test and compare different global
* placers.
*/
class GlobalPlacer {
public:
virtual ~GlobalPlacer() {}

/**
* @brief Constructor of the base GlobalPlacer class
AlexandreSinger marked this conversation as resolved.
Show resolved Hide resolved
*
* @param netlist Netlist of the design at some abstraction level;
* typically this would have some atoms and groups of
* atoms (in a pack pattern).
* @param log_verbosity The verbosity of log messages in the Global
* Placer.
*/
GlobalPlacer(const APNetlist& netlist, int log_verbosity = 1)
: netlist_(netlist),
log_verbosity_(log_verbosity) {}

/**
* @brief Perform global placement on the given netlist.
*
* The role of a global placer is to try and find a placement for the given
* netlist which optimizes some objective function and is mostly legal.
*/
virtual PartialPlacement place() = 0;

protected:

/// @brief The APNetlist the global placer is placing.
const APNetlist& netlist_;

/// @brief The setting of how verbose the log messages should be in the
/// global placer. Anything larger than zero will display per
/// iteration status messages.
int log_verbosity_;
};

/**
* @brief A factory method which creates a Global Placer of the given type.
*/
std::unique_ptr<GlobalPlacer> make_global_placer(e_global_placer placer_type,
const APNetlist& netlist);

/**
* @brief A Global Placer based on the SimPL work for analytical ASIC placement.
* https://doi.org/10.1145/2461256.2461279
*
* This placement technique uses a solver to generate a placement that optimizes
* over some objective function and is likely very illegal (has many overlapping
* blocks and blocks in the wrong places). This solution represents the "lower-
* bound" on the solution quality.
*
* This technique passes this "lower-bound" solution into a legalizer, which
* tries to find the closest legal solution to the lower-bound solution (by
* spreading out blocks and placing them in legal positions). This often
* destroys the quality of the lower-bound solution, and is considered an
* "upper-bound" on the solution quality.
*
* Each iteration of this global placer, the upper-bound solution is fed into
* the solver as a "hint" to what a legal solution looks like. This allows the
* solver to produce another placement which will make decisions knowing where
* the blocks will end-up in the legal solution. This worstens the quality of
* the lower-bound solution; however, after passing this solution back into
* the legalizer, this will likely improve the quality of the upper-bound
* solution.
*
* Over several iterations the upper-bound and lower-bound solutions will
* approach each other until a good quality, mostly-legal solution is found.
*/
class SimPLGlobalPlacer : public GlobalPlacer {
private:

/// @brief The maximum number of iterations the global placer can perform.
static constexpr size_t max_num_iterations_ = 100;

/// @brief The target relative gap between the HPWL of the upper-bound and
/// lower-bound placements. The placer will stop if the difference
/// between the two bounds, normalized to the upper-bound, is smaller
/// than this number.
static constexpr double target_hpwl_relative_gap_ = 0.10;

/// @brief The solver which generates the lower-bound placement.
std::unique_ptr<AnalyticalSolver> solver_;

/// @brief The legalizer which generates the upper-bound placement.
std::unique_ptr<PartialLegalizer> partial_legalizer_;

public:

/**
* @brief Constructor for the SimPL Global Placer
*
* Constructs the solver and partial legalizer.
*/
SimPLGlobalPlacer(const APNetlist& netlist);

/**
* @brief Run a SimPL-like global placement algorithm
*
* This iteratively runs the solver and legalizer until a good quality and
* mostly-legal placement is found.
*/
PartialPlacement place() final;
};

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time
fixed_k6_frac_N8_22nm.xml single_wire.v common 4.25 vpr 70.91 MiB -1 -1 0.18 16276 1 0.39 -1 -1 29812 -1 -1 0 1 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72608 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 70.9 MiB 0.10 8 14 1 6.79088e+06 0 166176. 575.005 0.36 0.00138004 0.00129992 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00122838 0.00119736
fixed_k6_frac_N8_22nm.xml single_ff.v common 4.68 vpr 71.03 MiB -1 -1 0.20 16236 1 0.39 -1 -1 29696 -1 -1 1 2 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72736 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 71.0 MiB 0.10 20 31 1 6.79088e+06 13472 414966. 1435.87 0.63 0.00135413 0.0012936 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.17 0.00 0.14 -1 -1 0.17 0.00127341 0.00123431
fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.69 vpr 71.67 MiB -1 -1 0.46 18220 3 0.40 -1 -1 33084 -1 -1 40 99 3 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73388 99 130 240 229 1 247 272 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 71.7 MiB 0.21 32 3122 15 6.79088e+06 2.18288e+06 586450. 2029.24 1.84 0.271358 0.247517 24814 144142 -1 2952 30 711 1121 349988 188928 2.0466 2.0466 -154.346 -2.0466 -0.04337 -0.04337 744469. 2576.02 0.25 0.25 0.22 -1 -1 0.25 0.102379 0.0937273
fixed_k6_frac_N8_22nm.xml diffeq1.v common 32.17 vpr 74.24 MiB -1 -1 0.75 23104 15 0.61 -1 -1 34204 -1 -1 74 162 0 5 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76024 162 96 817 258 1 797 337 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 74.2 MiB 0.60 60 15916 46 6.79088e+06 2.97693e+06 1.01997e+06 3529.29 24.68 3.56948 3.3772 29998 257685 -1 13617 19 4413 11558 1499556 342325 21.9657 21.9657 -1806.56 -21.9657 0 0 1.27783e+06 4421.56 0.40 0.79 0.44 -1 -1 0.40 0.334496 0.31821
fixed_k6_frac_N8_22nm.xml single_wire.v common 2.23 vpr 70.88 MiB -1 -1 0.13 15888 1 0.16 -1 -1 29628 -1 -1 0 1 0 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72576 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.13 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.13 70.9 MiB 0.09 8 14 1 6.79088e+06 0 166176. 575.005 0.37 0.001421 0.00133975 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00128701 0.00125353
fixed_k6_frac_N8_22nm.xml single_ff.v common 2.68 vpr 70.80 MiB -1 -1 0.14 16336 1 0.16 -1 -1 29640 -1 -1 1 2 0 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72500 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.8 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.8 MiB 0.14 70.8 MiB 0.09 20 31 1 6.79088e+06 13472 414966. 1435.87 0.65 0.00147526 0.00140277 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.18 0.00 0.15 -1 -1 0.18 0.00127433 0.00123146
fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 4.51 vpr 71.77 MiB -1 -1 0.47 18200 3 0.09 -1 -1 33188 -1 -1 41 99 3 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73496 99 130 240 229 1 247 273 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.30 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.30 71.8 MiB 0.23 32 3145 19 6.79088e+06 2.19635e+06 586450. 2029.24 1.54 0.232941 0.21265 24814 144142 -1 2897 11 635 1003 105957 24691 2.0466 2.0466 -155.681 -2.0466 -0.21204 -0.16867 744469. 2576.02 0.27 0.09 0.22 -1 -1 0.27 0.046062 0.042439
fixed_k6_frac_N8_22nm.xml diffeq1.v common 30.51 vpr 74.25 MiB -1 -1 0.73 23000 15 0.35 -1 -1 34316 -1 -1 65 162 0 5 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76028 162 96 817 258 1 792 328 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.81 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.81 74.2 MiB 0.61 68 16708 27 6.79088e+06 2.85568e+06 1.14541e+06 3963.36 22.39 3.52495 3.33579 31438 289477 -1 14397 19 4042 10567 1489293 328107 22.3059 22.3059 -1909.12 -22.3059 0 0 1.42693e+06 4937.46 0.51 0.78 0.51 -1 -1 0.51 0.316463 0.300694