Skip to content

Commit

Permalink
Merge pull request #2806 from AlexandreSinger/feature-ap-global-place…
Browse files Browse the repository at this point in the history
…ment

[AP] Global Placer
  • Loading branch information
vaughnbetz authored Nov 14, 2024
2 parents 113655f + fe92b3e commit 2586a98
Show file tree
Hide file tree
Showing 5 changed files with 285 additions and 17 deletions.
15 changes: 4 additions & 11 deletions vpr/src/analytical_place/analytical_placement_flow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "atom_netlist.h"
#include "full_legalizer.h"
#include "gen_ap_netlist_from_atoms.h"
#include "global_placer.h"
#include "globals.h"
#include "partial_legalizer.h"
#include "partial_placement.h"
Expand Down Expand Up @@ -58,7 +59,6 @@ static void print_ap_netlist_stats(const APNetlist& netlist) {
}

void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
(void)vpr_setup;
// Start an overall timer for the Analytical Placement flow.
vtr::ScopedStartFinishTimer timer("Analytical Placement");

Expand All @@ -79,16 +79,9 @@ void run_analytical_placement_flow(t_vpr_setup& vpr_setup) {
print_ap_netlist_stats(ap_netlist);

// Run the Global Placer
// For now, just runs the solver and partial legalizer 10 times arbitrarily.
PartialPlacement p_placement(ap_netlist);
std::unique_ptr<AnalyticalSolver> solver = make_analytical_solver(e_analytical_solver::QP_HYBRID,
ap_netlist);
std::unique_ptr<PartialLegalizer> legalizer = make_partial_legalizer(e_partial_legalizer::FLOW_BASED,
ap_netlist);
for (size_t i = 0; i < 10; i++) {
solver->solve(i, p_placement);
legalizer->legalize(p_placement);
}
std::unique_ptr<GlobalPlacer> global_placer = make_global_placer(e_global_placer::SimPL,
ap_netlist);
PartialPlacement p_placement = global_placer->place();

// Verify that the partial placement is valid before running the full
// legalizer.
Expand Down
2 changes: 0 additions & 2 deletions vpr/src/analytical_place/full_legalizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,6 @@ void FullLegalizer::place_clusters(const ClusteredNetlist& clb_nlist,

// FIXME: Allocate and load moveable blocks?
// - This may be needed to perform SA. Not needed right now.

// TODO: Check initial placement legality
}

void FullLegalizer::legalize(const PartialPlacement& p_placement) {
Expand Down
134 changes: 134 additions & 0 deletions vpr/src/analytical_place/global_placer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
/**
* @file
* @author Alex Singer
* @date October 2024
* @brief The definitions of the global placers used in the AP flow and their
* base class.
*/

#include "global_placer.h"
#include <cstdio>
#include <memory>
#include "analytical_solver.h"
#include "ap_netlist.h"
#include "partial_legalizer.h"
#include "partial_placement.h"
#include "vpr_error.h"
#include "vtr_log.h"
#include "vtr_time.h"

std::unique_ptr<GlobalPlacer> make_global_placer(e_global_placer placer_type,
const APNetlist& netlist) {
// Based on the placer type passed in, build the global placer.
switch (placer_type) {
case e_global_placer::SimPL:
return std::make_unique<SimPLGlobalPlacer>(netlist);
default:
VPR_FATAL_ERROR(VPR_ERROR_AP,
"Unrecognized global placer type");

}
}

SimPLGlobalPlacer::SimPLGlobalPlacer(const APNetlist& netlist) : GlobalPlacer(netlist) {
// This can be a long method. Good to time this to see how long it takes to
// construct the global placer.
vtr::ScopedStartFinishTimer global_placer_building_timer("Constructing Global Placer");
// Build the solver.
solver_ = make_analytical_solver(e_analytical_solver::QP_HYBRID,
netlist);
// Build the partial legalizer
partial_legalizer_ = make_partial_legalizer(e_partial_legalizer::FLOW_BASED,
netlist);
}

/**
* @brief Helper method to print the header of the per-iteration status updates
* of the global placer.
*/
static void print_SimPL_status_header() {
VTR_LOG("---- ---------------- ---------------- ----------- -------------- ----------\n");
VTR_LOG("Iter Lower Bound HPWL Upper Bound HPWL Solver Time Legalizer Time Total Time\n");
VTR_LOG(" (sec) (sec) (sec)\n");
VTR_LOG("---- ---------------- ---------------- ----------- -------------- ----------\n");
}

/**
* @brief Helper method to print the per-iteration status of the global placer.
*/
static void print_SimPL_status(size_t iteration,
double lb_hpwl,
double ub_hpwl,
float solver_time,
float legalizer_time,
float total_time) {
// Iteration
VTR_LOG("%4zu", iteration);

// Lower Bound HPWL
VTR_LOG(" %16.2f", lb_hpwl);

// Upper Bound HPWL
VTR_LOG(" %16.2f", ub_hpwl);

// Solver runtime
VTR_LOG(" %11.3f", solver_time);

// Legalizer runtime
VTR_LOG(" %14.3f", legalizer_time);

// Total runtime
VTR_LOG(" %10.3f", total_time);

VTR_LOG("\n");

fflush(stdout);
}

PartialPlacement SimPLGlobalPlacer::place() {
// Create a timer to time the entire global placement time.
vtr::ScopedStartFinishTimer global_placer_time("AP Global Placer");
// Create a timer to keep track of how long the solver and legalizer take.
vtr::Timer runtime_timer;
// Print the status header.
if (log_verbosity_ >= 1)
print_SimPL_status_header();
// Initialialize the partial placement object.
PartialPlacement p_placement(netlist_);
// Run the global placer.
for (size_t i = 0; i < max_num_iterations_; i++) {
float iter_start_time = runtime_timer.elapsed_sec();

// Run the solver.
float solver_start_time = runtime_timer.elapsed_sec();
solver_->solve(i, p_placement);
float solver_end_time = runtime_timer.elapsed_sec();
double lb_hpwl = p_placement.get_hpwl(netlist_);

// Run the legalizer.
float legalizer_start_time = runtime_timer.elapsed_sec();
partial_legalizer_->legalize(p_placement);
float legalizer_end_time = runtime_timer.elapsed_sec();
double ub_hpwl = p_placement.get_hpwl(netlist_);

// Print some stats
if (log_verbosity_ >= 1) {
float iter_end_time = runtime_timer.elapsed_sec();
print_SimPL_status(i, lb_hpwl, ub_hpwl,
solver_end_time - solver_start_time,
legalizer_end_time - legalizer_start_time,
iter_end_time - iter_start_time);
}

// Exit condition: If the upper-bound and lower-bound HPWLs are
// sufficiently close together then stop.
double hpwl_relative_gap = (ub_hpwl - lb_hpwl) / ub_hpwl;
if (hpwl_relative_gap < target_hpwl_relative_gap_)
break;
}
// Return the placement from the final iteration.
// TODO: investigate saving the best solution found so far. It should be
// cheap to save a copy of the PartialPlacement object.
return p_placement;
}

143 changes: 143 additions & 0 deletions vpr/src/analytical_place/global_placer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/**
* @file
* @author Alex Singer
* @date October 2024
* @brief The declarations of the Global Placer base class which is used to
* define the functionality of all global placers in the AP flow.
*
* A Global Placer creates a Partial Placement given only the netlist and the
* architecture. It uses analytical techniques (i.e. efficient numerical
* minimization of an objective function of a placement) to find a placement
* that optimizes for objectives subject to some of the constraints of the FPGA
* architecture.
*/

#pragma once

#include <memory>

// Forward declarations
class APNetlist;
class AnalyticalSolver;
class PartialPlacement;
class PartialLegalizer;

/**
* @brief Enumeration of all of the global placers currently implemented in VPR.
*/
enum class e_global_placer {
SimPL // Global placer based on the SimPL paper.
};

/**
* @brief The Global Placer base class
*
* This declares the functionality that all Global Placers will use. This
* provides a standard interface for the global placers so they can be used
* interchangably. This makes it very easy to test and compare different global
* placers.
*/
class GlobalPlacer {
public:
virtual ~GlobalPlacer() {}

/**
* @brief Constructor of the base GlobalPlacer class
*
* @param netlist Netlist of the design at some abstraction level;
* typically this would have some atoms and groups of
* atoms (in a pack pattern).
* @param log_verbosity The verbosity of log messages in the Global
* Placer.
*/
GlobalPlacer(const APNetlist& netlist, int log_verbosity = 1)
: netlist_(netlist),
log_verbosity_(log_verbosity) {}

/**
* @brief Perform global placement on the given netlist.
*
* The role of a global placer is to try and find a placement for the given
* netlist which optimizes some objective function and is mostly legal.
*/
virtual PartialPlacement place() = 0;

protected:

/// @brief The APNetlist the global placer is placing.
const APNetlist& netlist_;

/// @brief The setting of how verbose the log messages should be in the
/// global placer. Anything larger than zero will display per
/// iteration status messages.
int log_verbosity_;
};

/**
* @brief A factory method which creates a Global Placer of the given type.
*/
std::unique_ptr<GlobalPlacer> make_global_placer(e_global_placer placer_type,
const APNetlist& netlist);

/**
* @brief A Global Placer based on the SimPL work for analytical ASIC placement.
* https://doi.org/10.1145/2461256.2461279
*
* This placement technique uses a solver to generate a placement that optimizes
* over some objective function and is likely very illegal (has many overlapping
* blocks and blocks in the wrong places). This solution represents the "lower-
* bound" on the solution quality.
*
* This technique passes this "lower-bound" solution into a legalizer, which
* tries to find the closest legal solution to the lower-bound solution (by
* spreading out blocks and placing them in legal positions). This often
* destroys the quality of the lower-bound solution, and is considered an
* "upper-bound" on the solution quality.
*
* Each iteration of this global placer, the upper-bound solution is fed into
* the solver as a "hint" to what a legal solution looks like. This allows the
* solver to produce another placement which will make decisions knowing where
* the blocks will end-up in the legal solution. This worstens the quality of
* the lower-bound solution; however, after passing this solution back into
* the legalizer, this will likely improve the quality of the upper-bound
* solution.
*
* Over several iterations the upper-bound and lower-bound solutions will
* approach each other until a good quality, mostly-legal solution is found.
*/
class SimPLGlobalPlacer : public GlobalPlacer {
private:

/// @brief The maximum number of iterations the global placer can perform.
static constexpr size_t max_num_iterations_ = 100;

/// @brief The target relative gap between the HPWL of the upper-bound and
/// lower-bound placements. The placer will stop if the difference
/// between the two bounds, normalized to the upper-bound, is smaller
/// than this number.
static constexpr double target_hpwl_relative_gap_ = 0.10;

/// @brief The solver which generates the lower-bound placement.
std::unique_ptr<AnalyticalSolver> solver_;

/// @brief The legalizer which generates the upper-bound placement.
std::unique_ptr<PartialLegalizer> partial_legalizer_;

public:

/**
* @brief Constructor for the SimPL Global Placer
*
* Constructs the solver and partial legalizer.
*/
SimPLGlobalPlacer(const APNetlist& netlist);

/**
* @brief Run a SimPL-like global placement algorithm
*
* This iteratively runs the solver and legalizer until a good quality and
* mostly-legal placement is found.
*/
PartialPlacement place() final;
};

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time
fixed_k6_frac_N8_22nm.xml single_wire.v common 4.25 vpr 70.91 MiB -1 -1 0.18 16276 1 0.39 -1 -1 29812 -1 -1 0 1 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72608 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.14 70.9 MiB 0.10 8 14 1 6.79088e+06 0 166176. 575.005 0.36 0.00138004 0.00129992 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00122838 0.00119736
fixed_k6_frac_N8_22nm.xml single_ff.v common 4.68 vpr 71.03 MiB -1 -1 0.20 16236 1 0.39 -1 -1 29696 -1 -1 1 2 0 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72736 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.0 MiB 0.14 71.0 MiB 0.10 20 31 1 6.79088e+06 13472 414966. 1435.87 0.63 0.00135413 0.0012936 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.17 0.00 0.14 -1 -1 0.17 0.00127341 0.00123431
fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 6.69 vpr 71.67 MiB -1 -1 0.46 18220 3 0.40 -1 -1 33084 -1 -1 40 99 3 0 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73388 99 130 240 229 1 247 272 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.7 MiB 0.28 71.7 MiB 0.21 32 3122 15 6.79088e+06 2.18288e+06 586450. 2029.24 1.84 0.271358 0.247517 24814 144142 -1 2952 30 711 1121 349988 188928 2.0466 2.0466 -154.346 -2.0466 -0.04337 -0.04337 744469. 2576.02 0.25 0.25 0.22 -1 -1 0.25 0.102379 0.0937273
fixed_k6_frac_N8_22nm.xml diffeq1.v common 32.17 vpr 74.24 MiB -1 -1 0.75 23104 15 0.61 -1 -1 34204 -1 -1 74 162 0 5 success v8.0.0-11571-g5eb3aa508 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T00:28:35 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76024 162 96 817 258 1 797 337 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.01 74.2 MiB 0.60 60 15916 46 6.79088e+06 2.97693e+06 1.01997e+06 3529.29 24.68 3.56948 3.3772 29998 257685 -1 13617 19 4413 11558 1499556 342325 21.9657 21.9657 -1806.56 -21.9657 0 0 1.27783e+06 4421.56 0.40 0.79 0.44 -1 -1 0.40 0.334496 0.31821
fixed_k6_frac_N8_22nm.xml single_wire.v common 2.23 vpr 70.88 MiB -1 -1 0.13 15888 1 0.16 -1 -1 29628 -1 -1 0 1 0 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72576 1 1 0 2 0 1 2 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.13 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.9 MiB 0.13 70.9 MiB 0.09 8 14 1 6.79088e+06 0 166176. 575.005 0.37 0.001421 0.00133975 20206 45088 -1 19 1 1 1 194 45 0.7726 nan -0.7726 -0.7726 0 0 202963. 702.294 0.09 0.00 0.07 -1 -1 0.09 0.00128701 0.00125353
fixed_k6_frac_N8_22nm.xml single_ff.v common 2.68 vpr 70.80 MiB -1 -1 0.14 16336 1 0.16 -1 -1 29640 -1 -1 1 2 0 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 72500 2 1 3 3 1 3 4 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 70.8 MiB 0.14 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 70.8 MiB 0.14 70.8 MiB 0.09 20 31 1 6.79088e+06 13472 414966. 1435.87 0.65 0.00147526 0.00140277 22510 95286 -1 35 1 2 2 213 52 0.942216 0.942216 -1.68896 -0.942216 0 0 503264. 1741.40 0.18 0.00 0.15 -1 -1 0.18 0.00127433 0.00123146
fixed_k6_frac_N8_22nm.xml ch_intrinsics.v common 4.51 vpr 71.77 MiB -1 -1 0.47 18200 3 0.09 -1 -1 33188 -1 -1 41 99 3 0 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 73496 99 130 240 229 1 247 273 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.30 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 71.8 MiB 0.30 71.8 MiB 0.23 32 3145 19 6.79088e+06 2.19635e+06 586450. 2029.24 1.54 0.232941 0.21265 24814 144142 -1 2897 11 635 1003 105957 24691 2.0466 2.0466 -155.681 -2.0466 -0.21204 -0.16867 744469. 2576.02 0.27 0.09 0.22 -1 -1 0.27 0.046062 0.042439
fixed_k6_frac_N8_22nm.xml diffeq1.v common 30.51 vpr 74.25 MiB -1 -1 0.73 23000 15 0.35 -1 -1 34316 -1 -1 65 162 0 5 success v8.0.0-11573-g5ea68eac9 release VTR_ASSERT_LEVEL=3 GNU 9.4.0 on Linux-4.15.0-213-generic x86_64 2024-10-18T14:02:52 betzgrp-wintermute.eecg.utoronto.ca /home/singera8/vtr-verilog-to-routing/vtr_flow/tasks 76028 162 96 817 258 1 792 328 17 17 289 -1 unnamed_device -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.81 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 74.2 MiB 1.81 74.2 MiB 0.61 68 16708 27 6.79088e+06 2.85568e+06 1.14541e+06 3963.36 22.39 3.52495 3.33579 31438 289477 -1 14397 19 4042 10567 1489293 328107 22.3059 22.3059 -1909.12 -22.3059 0 0 1.42693e+06 4937.46 0.51 0.78 0.51 -1 -1 0.51 0.316463 0.300694

0 comments on commit 2586a98

Please sign in to comment.