Skip to content

Commit

Permalink
feat: implement validation pass and supporting infrastructure
Browse files Browse the repository at this point in the history
This commit implements the following:

* The `Rule` trait, used to implement validation rules for different
  types of IR items
* The `ModuleValidator` pass, used to run a suite of validations against
  modules and the functions they contain
* A suite of validation rules for things critical areas where we want to
  catch errors in the IR before attempting to process it for code
  generation: naming conventions, function signatures, ensuring blocks
  are valid, and that key properties of the SSA representation are
  upheld, such as value uses being dominated by their definitions, as
  well as a typechecking rule.
  • Loading branch information
bitwalker committed Oct 18, 2023
1 parent e89c194 commit 7a86ed1
Show file tree
Hide file tree
Showing 6 changed files with 2,429 additions and 2 deletions.
10 changes: 8 additions & 2 deletions hir-analysis/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ mod control_flow;
mod dominance;
mod liveness;
mod loops;
mod validation;

pub use self::control_flow::{BlockPredecessor, ControlFlowGraph};
pub use self::dominance::{DominanceFrontier, DominatorTree, DominatorTreePreorder};
pub use self::liveness::LivenessAnalysis;
pub use self::loops::{Loop, LoopAnalysis, LoopLevel};
pub use self::validation::{ModuleValidator, Rule};

use anyhow::anyhow;

Expand Down Expand Up @@ -164,11 +166,15 @@ impl FunctionAnalysis {
pub fn cfg_changed(&mut self, function: &miden_hir::Function) {
// If the dominator tree hasn't been computed, no other
// analyses could possibly have been computed yet.
let Some(domtree) = self.domtree.as_mut() else { return; };
let Some(domtree) = self.domtree.as_mut() else {
return;
};
domtree.compute(function, &self.cfg);

// Likewise for loop analysis - we can't compute liveness without it
let Some(loops) = self.loops.as_mut() else { return; };
let Some(loops) = self.loops.as_mut() else {
return;
};
loops.compute(function, &self.cfg, domtree);

if let Some(liveness) = self.liveness.as_mut() {
Expand Down
243 changes: 243 additions & 0 deletions hir-analysis/src/validation/block.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
use miden_diagnostics::{DiagnosticsHandler, Severity, SourceSpan, Spanned};
use miden_hir::*;
use rustc_hash::FxHashSet;
use smallvec::SmallVec;

use super::{Rule, ValidationError};
use crate::DominatorTree;

/// This validation rule ensures that all values definitions dominate their uses.
///
/// For example, it is not valid to use a value in a block when its definition only
/// occurs along a subset of control flow paths which may be taken to that block.
///
/// This also catches uses of values which are orphaned (i.e. they are defined by
/// a block parameter or instruction which is not attached to the function).
pub struct DefsDominateUses<'a> {
dfg: &'a DataFlowGraph,
domtree: &'a DominatorTree,
}
impl<'a> DefsDominateUses<'a> {
pub fn new(dfg: &'a DataFlowGraph, domtree: &'a DominatorTree) -> Self {
Self { dfg, domtree }
}
}
impl<'a> Rule<BlockData> for DefsDominateUses<'a> {
fn validate(
&mut self,
block_data: &BlockData,
diagnostics: &DiagnosticsHandler,
) -> Result<(), ValidationError> {
let current_block = block_data.id;
let mut uses = FxHashSet::<Value>::default();
let mut defs = FxHashSet::<Value>::default();
for node in block_data.insts.iter() {
let span = node.span();

uses.clear();
defs.clear();

// Verify the integrity of the instruction results
for result in self.dfg.inst_results(node.key) {
// It should never be possible for a value id to be present in the result set twice
assert!(defs.insert(*result));
}

// Gather all value uses to check
uses.extend(node.arguments(&self.dfg.value_lists).iter().copied());
match node.analyze_branch(&self.dfg.value_lists) {
BranchInfo::NotABranch => (),
BranchInfo::SingleDest(_, args) => {
uses.extend(args.iter().copied());
}
BranchInfo::MultiDest(ref jts) => {
for jt in jts.iter() {
uses.extend(jt.args.iter().copied());
}
}
}

// Make sure there are no uses of the instructions own results
if !defs.is_disjoint(&uses) {
invalid_instruction!(
diagnostics,
node.key,
span,
"an instruction may not use its own results as arguments",
"This situation can only arise if one has manually modified the arguments of an instruction, \
incorrectly inserting a value obtained from the set of instruction results."
);
}

// Next, ensure that all used values are dominated by their definition
for value in uses.iter().copied() {
match self.dfg.value_data(value) {
// If the value comes from the current block's parameter list, this use is trivially dominated
ValueData::Param { block, .. } if block == &current_block => continue,
// If the value comes from another block, then as long as all paths to the current
// block flow through that block, then this use is dominated by its definition
ValueData::Param { block, .. } => {
if self.domtree.dominates(*block, current_block, &self.dfg) {
continue;
}
}
// If the value is an instruction result, then as long as all paths to the current
// instruction flow through the defining instruction, then this use is dominated
// by its definition
ValueData::Inst { inst, .. } => {
if self.domtree.dominates(*inst, node.key, &self.dfg) {
continue;
}
}
}

// If we reach here, the use of `value` is not dominated by its definition,
// so this use is invalid
invalid_instruction!(
diagnostics,
node.key,
span,
"an argument of this instruction, {value}, is not defined on all paths leading to this point",
"All uses of a value must be dominated by its definition, i.e. all control flow paths \
from the function entry to the point of each use must flow through the point where \
that value is defined."
);
}
}

Ok(())
}
}

/// This validation rule ensures that most block-local invariants are upheld:
///
/// * A block may not be empty
/// * A block must end with a terminator instruction
/// * A block may not contain a terminator instruction in any position but the end
/// * A block which terminates with a branch instruction must reference a block
/// that is present in the function body (i.e. it is not valid to reference
/// detached blocks)
/// * A multi-way branch instruction must have at least one successor
/// * A multi-way branch instruction must not specify the same block as a successor multiple times.
///
/// This rule does not perform type checking, or verify use/def dominance.
pub struct BlockValidator<'a> {
dfg: &'a DataFlowGraph,
span: SourceSpan,
}
impl<'a> BlockValidator<'a> {
pub fn new(dfg: &'a DataFlowGraph, span: SourceSpan) -> Self {
Self { dfg, span }
}
}
impl<'a> Rule<BlockData> for BlockValidator<'a> {
fn validate(
&mut self,
block_data: &BlockData,
diagnostics: &DiagnosticsHandler,
) -> Result<(), ValidationError> {
// Ignore blocks which are not attached to the function body
if !block_data.link.is_linked() {
return Ok(());
}

// Ensure there is a terminator, and that it is valid
let id = block_data.id;
let terminator = block_data.insts.back().get();
if terminator.is_none() {
// This block is empty
invalid_block!(
diagnostics,
id,
self.span,
"block cannot be empty",
"Empty blocks are only valid when detached from the function body"
);
}

let terminator = terminator.unwrap();
let op = terminator.opcode();
if !op.is_terminator() {
invalid_block!(
diagnostics,
id,
self.span,
"invalid block terminator",
format!("The last instruction in a block must be a terminator, but {id} ends with {op} which is not a valid terminator")
);
}
match terminator.analyze_branch(&self.dfg.value_lists) {
BranchInfo::SingleDest(destination, _) => {
let dest = self.dfg.block(destination);
if !dest.link.is_linked() {
invalid_instruction!(
diagnostics,
terminator.key,
terminator.span(),
"invalid successor",
format!("A block reference is only valid if the referenced block is present in the function layout. \
{id} references {destination}, but the latter is not in the layout")
);
}
}
BranchInfo::MultiDest(ref jts) => {
if jts.is_empty() {
invalid_instruction!(
diagnostics,
terminator.key,
terminator.span(),
"incomplete {op} instruction",
"This instruction normally has 2 or more successors, but none were given."
);
}

let mut seen = SmallVec::<[Block; 4]>::default();
for jt in jts.iter() {
let dest = self.dfg.block(jt.destination);
let destination = jt.destination;
if !dest.link.is_linked() {
invalid_instruction!(
diagnostics,
terminator.key,
terminator.span(),
"invalid successor",
format!("A block reference is only valid if the referenced block is present in the function layout. \
{id} references {destination}, but the latter is not in the layout")
);
}

if seen.contains(&jt.destination) {
invalid_instruction!(
diagnostics,
terminator.key,
terminator.span(),
"invalid {op} instruction",
format!("A given block may only be a successor along a single control flow path, \
but {id} uses {destination} as a successor for more than one path")
);
}

seen.push(jt.destination);
}
}
BranchInfo::NotABranch => (),
}

// Verify that there are no terminator instructions in any other position than last
for node in block_data.insts.iter() {
let op = node.opcode();
if op.is_terminator() && node.key != terminator.key {
invalid_block!(
diagnostics,
id,
self.span,
"terminator found in middle of block",
format!("A block may only have a terminator instruction as the last instruction in the block, \
but {id} uses {op} before the end of the block")
);
}
}

Ok(())
}
}
Loading

0 comments on commit 7a86ed1

Please sign in to comment.