Skip to content

Commit

Permalink
Merge Compaction and Promotion (#1914)
Browse files Browse the repository at this point in the history
* throat clearing'

* progress

* heuristic/compaction choose

* fix ups

* rewrite tests

* promotion

* cargo

* no compact

* delte compaction

* fix tests

* documentation

* inference analysis

* rw set
  • Loading branch information
calebmkim authored Feb 16, 2024
1 parent d8702f5 commit 60f119e
Show file tree
Hide file tree
Showing 37 changed files with 569 additions and 177 deletions.
200 changes: 200 additions & 0 deletions calyx-opt/src/analysis/compaction_analysis.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
use crate::analysis::{ControlOrder, PromotionAnalysis};
use calyx_ir::{self as ir};
use ir::GetAttributes;
use itertools::Itertools;
use petgraph::{algo, graph::NodeIndex};
use std::collections::HashMap;

use super::read_write_set::AssignmentAnalysis;

/// Struct to perform compaction on `seqs`.
/// It will only work if you update_cont_read_writes for each component that
/// you run it on.
#[derive(Debug, Default)]
pub struct CompactionAnalysis {
cont_reads: Vec<ir::RRC<ir::Cell>>,
cont_writes: Vec<ir::RRC<ir::Cell>>,
}

impl CompactionAnalysis {
/// Updates self so that compaction will take continuous assignments into account
pub fn update_cont_read_writes(&mut self, comp: &mut ir::Component) {
let (cont_reads, cont_writes) = (
comp.continuous_assignments
.iter()
.analysis()
.cell_reads()
.collect(),
comp.continuous_assignments
.iter()
.analysis()
.cell_writes()
.collect(),
);
self.cont_reads = cont_reads;
self.cont_writes = cont_writes;
}

// Given a total_order and sorted schedule, builds a vec of the original seq.
// Note that this function assumes the `total_order`` and `sorted_schedule`
// represent a completely sequential schedule.
fn recover_seq(
mut total_order: petgraph::graph::DiGraph<Option<ir::Control>, ()>,
sorted_schedule: Vec<(NodeIndex, u64)>,
) -> Vec<ir::Control> {
sorted_schedule
.into_iter()
.map(|(i, _)| total_order[i].take().unwrap())
.collect_vec()
}

/// Takes a vec of ctrl stmts and turns it into a compacted schedule.
/// If compaction doesn't lead to any latency decreases, it just returns
/// a vec of stmts in the original order.
/// If it can compact, then it returns a vec with one
/// element: a compacted static par.
pub fn compact_control_vec(
&mut self,
stmts: Vec<ir::Control>,
promotion_analysis: &mut PromotionAnalysis,
builder: &mut ir::Builder,
) -> Vec<ir::Control> {
// Records the corresponding node indices that each control program
// has data dependency on.
let mut dependency: HashMap<NodeIndex, Vec<NodeIndex>> = HashMap::new();
// Records the latency of corresponding control operator for each
// node index.
let mut latency_map: HashMap<NodeIndex, u64> = HashMap::new();
// Records the scheduled start time of corresponding control operator
// for each node index.
let mut schedule: HashMap<NodeIndex, u64> = HashMap::new();

let og_latency: u64 = stmts
.iter()
.map(PromotionAnalysis::get_inferred_latency)
.sum();

let mut total_order = ControlOrder::<false>::get_dependency_graph_seq(
stmts.into_iter(),
(&self.cont_reads, &self.cont_writes),
&mut dependency,
&mut latency_map,
);

if let Ok(order) = algo::toposort(&total_order, None) {
let mut total_time: u64 = 0;

// First we build the schedule.
for i in order {
// Start time is when the latest dependency finishes
let start = dependency
.get(&i)
.unwrap()
.iter()
.map(|node| schedule[node] + latency_map[node])
.max()
.unwrap_or(0);
schedule.insert(i, start);
total_time = std::cmp::max(start + latency_map[&i], total_time);
}

// We sort the schedule by start time.
let mut sorted_schedule: Vec<(NodeIndex, u64)> =
schedule.into_iter().collect();
sorted_schedule
.sort_by(|(k1, v1), (k2, v2)| (v1, k1).cmp(&(v2, k2)));

if total_time == og_latency {
// If we can't comapct at all, then just recover the and return
// the original seq.
return Self::recover_seq(total_order, sorted_schedule);
}

// Threads for the static par, where each entry is (thread, thread_latency)
let mut par_threads: Vec<(Vec<ir::Control>, u64)> = Vec::new();

// We encode the schedule while trying to minimize the number of
// par threads.
'outer: for (i, start) in sorted_schedule {
let control = total_order[i].take().unwrap();
for (thread, thread_latency) in par_threads.iter_mut() {
if *thread_latency <= start {
if *thread_latency < start {
// Need a no-op group so the schedule starts correctly
let no_op = builder.add_static_group(
"no-op",
start - *thread_latency,
);
thread.push(ir::Control::Static(
ir::StaticControl::Enable(ir::StaticEnable {
group: no_op,
attributes: ir::Attributes::default(),
}),
));
*thread_latency = start;
}
thread.push(control);
*thread_latency += latency_map[&i];
continue 'outer;
}
}
// We must create a new par thread.
if start > 0 {
// If start > 0, then we must add a delay to the start of the
// group.
let no_op = builder.add_static_group("no-op", start);
let no_op_enable = ir::Control::Static(
ir::StaticControl::Enable(ir::StaticEnable {
group: no_op,
attributes: ir::Attributes::default(),
}),
);
par_threads.push((
vec![no_op_enable, control],
start + latency_map[&i],
));
} else {
par_threads.push((vec![control], latency_map[&i]));
}
}
// Turn Vec<ir::StaticControl> -> StaticSeq
let mut par_control_threads: Vec<ir::StaticControl> = Vec::new();
for (thread, thread_latency) in par_threads {
let mut promoted_stmts = thread
.into_iter()
.map(|mut stmt| {
promotion_analysis.convert_to_static(&mut stmt, builder)
})
.collect_vec();
if promoted_stmts.len() == 1 {
// Don't wrap in static seq if we don't need to.
par_control_threads.push(promoted_stmts.pop().unwrap());
} else {
par_control_threads.push(ir::StaticControl::Seq(
ir::StaticSeq {
stmts: promoted_stmts,
attributes: ir::Attributes::default(),
latency: thread_latency,
},
));
}
}
// Double checking that we have built the static par correctly.
let max: Option<u64> =
par_control_threads.iter().map(|c| c.get_latency()).max();
assert!(max.unwrap() == total_time, "The schedule expects latency {}. The static par that was built has latency {}", total_time, max.unwrap());

let mut s_par = ir::StaticControl::Par(ir::StaticPar {
stmts: par_control_threads,
attributes: ir::Attributes::default(),
latency: total_time,
});
s_par.get_mut_attributes().insert(ir::BoolAttr::Promoted, 1);
vec![ir::Control::Static(s_par)]
} else {
panic!(
"Error when producing topo sort. Dependency graph has a cycle."
);
}
}
}
37 changes: 34 additions & 3 deletions calyx-opt/src/analysis/inference_analysis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,11 @@ impl InferenceAnalysis {
/// Note that this expects that the component already is accounted for
/// in self.latency_data and self.static_component_latencies.
pub fn remove_component(&mut self, comp_name: ir::Id) {
self.updated_components.insert(comp_name);
if self.latency_data.contains_key(&comp_name) {
// To make inference as strong as possible, only update updated_components
// if we actually updated it.
self.updated_components.insert(comp_name);
}
self.latency_data.remove(&comp_name);
self.static_component_latencies.remove(&comp_name);
}
Expand All @@ -216,15 +220,22 @@ impl InferenceAnalysis {
&mut self,
(comp_name, adjusted_latency): (ir::Id, u64),
) {
self.updated_components.insert(comp_name);
// Check whether we actually updated the component's latency.
let mut updated = false;
self.latency_data.entry(comp_name).and_modify(|go_done| {
for (_, _, cur_latency) in &mut go_done.ports {
// Updating components with latency data.
*cur_latency = adjusted_latency;
if *cur_latency != adjusted_latency {
*cur_latency = adjusted_latency;
updated = true;
}
}
});
self.static_component_latencies
.insert(comp_name, adjusted_latency);
if updated {
self.updated_components.insert(comp_name);
}
}

/// Return true if the edge (`src`, `dst`) meet one these criteria, and false otherwise:
Expand Down Expand Up @@ -502,6 +513,26 @@ impl InferenceAnalysis {
seq.update_static(&self.static_component_latencies);
}

pub fn fixup_par(&self, par: &mut ir::Par) {
par.update_static(&self.static_component_latencies);
}

pub fn fixup_if(&self, _if: &mut ir::If) {
_if.update_static(&self.static_component_latencies);
}

pub fn fixup_while(&self, _while: &mut ir::While) {
_while.update_static(&self.static_component_latencies);
}

pub fn fixup_repeat(&self, repeat: &mut ir::Repeat) {
repeat.update_static(&self.static_component_latencies);
}

pub fn fixup_ctrl(&self, ctrl: &mut ir::Control) {
ctrl.update_static(&self.static_component_latencies);
}

/// "Fixes Up" the component. In particular:
/// 1. Removes @promotable annotations for any groups that write to any
/// `updated_components`.
Expand Down
2 changes: 2 additions & 0 deletions calyx-opt/src/analysis/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//! The analyses construct data-structures that make answering certain queries
//! about Calyx programs easier.
mod compaction_analysis;
mod compute_static;
mod control_id;
mod control_order;
Expand All @@ -22,6 +23,7 @@ mod share_set;
mod static_par_timing;
mod variable_detection;

pub use compaction_analysis::CompactionAnalysis;
pub use compute_static::IntoStatic;
pub use compute_static::WithStatic;
pub use control_id::ControlId;
Expand Down
11 changes: 4 additions & 7 deletions calyx-opt/src/default_passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ use crate::passes::{
DeadAssignmentRemoval, DeadCellRemoval, DeadGroupRemoval, DiscoverExternal,
Externalize, GoInsertion, GroupToInvoke, GroupToSeq, HoleInliner,
InferShare, LowerGuards, MergeAssign, Papercut, ParToSeq,
RegisterUnsharing, RemoveIds, ResetInsertion, ScheduleCompaction,
SimplifyStaticGuards, SimplifyWithControl, StaticInference, StaticInliner,
StaticPromotion, SynthesisPapercut, TopDownCompileControl, UnrollBounded,
WellFormed, WireInliner, WrapMain,
RegisterUnsharing, RemoveIds, ResetInsertion, SimplifyStaticGuards,
SimplifyWithControl, StaticInference, StaticInliner, StaticPromotion,
SynthesisPapercut, TopDownCompileControl, UnrollBounded, WellFormed,
WireInliner, WrapMain,
};
use crate::traversal::Named;
use crate::{pass_manager::PassManager, register_alias};
Expand All @@ -35,7 +35,6 @@ impl PassManager {
pm.register_pass::<GroupToSeq>()?;
pm.register_pass::<InferShare>()?;
pm.register_pass::<CellShare>()?;
pm.register_pass::<ScheduleCompaction>()?;
pm.register_pass::<StaticInference>()?;
pm.register_pass::<StaticPromotion>()?;
pm.register_pass::<SimplifyStaticGuards>()?;
Expand Down Expand Up @@ -94,8 +93,6 @@ impl PassManager {
SimplifyWithControl, // Must run before compile-invoke
CompileInvoke, // creates dead comb groups
StaticInference,
ScheduleCompaction,
StaticPromotion,
StaticPromotion,
CompileRepeat,
DeadGroupRemoval, // Since previous passes potentially create dead groups
Expand Down
2 changes: 0 additions & 2 deletions calyx-opt/src/passes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ mod par_to_seq;
mod register_unsharing;
mod remove_ids;
mod reset_insertion;
mod schedule_compaction;
mod simplify_static_guards;
mod static_inference;
mod static_inliner;
Expand Down Expand Up @@ -72,7 +71,6 @@ pub use par_to_seq::ParToSeq;
pub use register_unsharing::RegisterUnsharing;
pub use remove_ids::RemoveIds;
pub use reset_insertion::ResetInsertion;
pub use schedule_compaction::ScheduleCompaction;
pub use simplify_static_guards::SimplifyStaticGuards;
pub use simplify_with_control::SimplifyWithControl;
pub use static_inference::StaticInference;
Expand Down
Loading

0 comments on commit 60f119e

Please sign in to comment.