diff --git a/CHANGES.md b/CHANGES.md index 65ba2ab95..bfd56cc07 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ 0.9-dev === +- Improve Control Flow Propagation normalization pass (PR #462) - Improve taint analysis abstraction to simplify interprocedural bottom-up analysis of memory taint (PR #451) - Added check for CWE-252: Unchecked Return Value (PR #451) diff --git a/src/cwe_checker_lib/src/analysis/graph.rs b/src/cwe_checker_lib/src/analysis/graph.rs index 25087c366..8ce3d3699 100644 --- a/src/cwe_checker_lib/src/analysis/graph.rs +++ b/src/cwe_checker_lib/src/analysis/graph.rs @@ -44,11 +44,14 @@ use crate::intermediate_representation::*; use crate::prelude::*; -use crate::utils::log::LogMessage; -use petgraph::graph::DiGraph; +use crate::utils::{debug::ToJsonCompact, log::LogMessage}; use std::collections::{HashMap, HashSet}; pub use petgraph::graph::NodeIndex; +use petgraph::{ + graph::DiGraph, + visit::{EdgeRef, IntoNodeReferences}, +}; /// The graph type of an interprocedural control flow graph pub type Graph<'a> = DiGraph, Edge<'a>>; @@ -182,6 +185,25 @@ pub enum Edge<'a> { ReturnCombine(&'a Term), } +impl<'a> std::fmt::Display for Edge<'a> { + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Block => { + write!(formatter, "Block") + } + Self::Jump(..) => { + write!(formatter, "Jump") + } + Self::Call { .. } => write!(formatter, "Call",), + Self::ExternCallStub { .. } => write!(formatter, "ExternCallStub",), + Self::CrCallStub => write!(formatter, "CrCallStub"), + Self::CrReturnStub => write!(formatter, "CrReturnStub"), + Self::CallCombine(..) => write!(formatter, "CallCombine"), + Self::ReturnCombine(..) => write!(formatter, "ReturnCombine"), + } + } +} + /// A builder struct for building graphs struct GraphBuilder<'a> { program: &'a Term, @@ -531,6 +553,82 @@ pub fn get_entry_nodes_of_subs(graph: &Graph) -> HashMap { sub_to_entry_node_map } +impl ToJsonCompact for Graph<'_> { + fn to_json_compact(&self) -> serde_json::Value { + let mut map = serde_json::Map::new(); + let mut node_counts_map = serde_json::Map::new(); + let mut edge_counts_map = serde_json::Map::new(); + let mut nodes_map = serde_json::Map::new(); + let mut edges_map = serde_json::Map::new(); + + let total_nodes = self.node_count(); + let mut blk_start_nodes = 0u64; + let mut blk_end_nodes = 0u64; + let mut call_return_nodes = 0u64; + let mut call_source_nodes = 0u64; + + for (idx, node) in self.node_references() { + nodes_map.insert(idx.index().to_string(), node.to_string().into()); + match node { + Node::BlkStart(..) => blk_start_nodes += 1, + Node::BlkEnd(..) => blk_end_nodes += 1, + Node::CallReturn { .. } => call_return_nodes += 1, + Node::CallSource { .. } => call_source_nodes += 1, + } + } + + node_counts_map.insert("total".into(), total_nodes.into()); + node_counts_map.insert("blk_start".into(), blk_start_nodes.into()); + node_counts_map.insert("blk_end".into(), blk_end_nodes.into()); + node_counts_map.insert("call_return".into(), call_return_nodes.into()); + node_counts_map.insert("call_source".into(), call_source_nodes.into()); + + let total_edges = self.edge_count(); + let mut block_edges = 0u64; + let mut jump_edges = 0u64; + let mut call_edges = 0u64; + let mut extern_call_stub_edges = 0u64; + let mut cr_call_stub_edges = 0u64; + let mut cr_return_stub_edges = 0u64; + let mut call_combine_edges = 0u64; + let mut return_combine_edges = 0u64; + + for edge in self.edge_references() { + edges_map.insert( + format!("{} -> {}", edge.source().index(), edge.target().index()), + edge.weight().to_string().into(), + ); + match edge.weight() { + Edge::Block => block_edges += 1, + Edge::Jump(..) => jump_edges += 1, + Edge::Call(..) => call_edges += 1, + Edge::ExternCallStub(..) => extern_call_stub_edges += 1, + Edge::CrCallStub => cr_call_stub_edges += 1, + Edge::CrReturnStub => cr_return_stub_edges += 1, + Edge::CallCombine(..) => call_combine_edges += 1, + Edge::ReturnCombine(..) => return_combine_edges += 1, + } + } + + edge_counts_map.insert("total".into(), total_edges.into()); + edge_counts_map.insert("block".into(), block_edges.into()); + edge_counts_map.insert("jump".into(), jump_edges.into()); + edge_counts_map.insert("call".into(), call_edges.into()); + edge_counts_map.insert("extern_call_stub".into(), extern_call_stub_edges.into()); + edge_counts_map.insert("cr_call_stub".into(), cr_call_stub_edges.into()); + edge_counts_map.insert("cr_return_stub".into(), cr_return_stub_edges.into()); + edge_counts_map.insert("call_combine".into(), call_combine_edges.into()); + edge_counts_map.insert("return_combine".into(), return_combine_edges.into()); + + map.insert("node_counts".into(), node_counts_map.into()); + map.insert("edge_counts".into(), edge_counts_map.into()); + map.insert("nodes".into(), nodes_map.into()); + map.insert("edges".into(), edges_map.into()); + + serde_json::Value::Object(map) + } +} + #[cfg(test)] mod tests { use crate::expr; diff --git a/src/cwe_checker_lib/src/intermediate_representation/blk.rs b/src/cwe_checker_lib/src/intermediate_representation/blk.rs index 6f3f11b0f..75e32c6ba 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/blk.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/blk.rs @@ -39,19 +39,22 @@ pub struct Blk { } impl Term { - /// Remove indirect jump target addresses for which no corresponding target block exists. - /// Return an error message for each removed address. + /// Remove indirect jump target addresses for which no corresponding target + /// block exists. + /// + /// Returns an error message for each removed address. pub fn remove_nonexisting_indirect_jump_targets( &mut self, - known_block_tids: &HashSet, + all_jump_targets: &HashSet, ) -> Result<(), Vec> { let mut logs = Vec::new(); + self.term.indirect_jmp_targets = self .term .indirect_jmp_targets .iter() .filter_map(|target| { - if known_block_tids.get(target).is_some() { + if all_jump_targets.contains(target) { Some(target.clone()) } else { let error_msg = @@ -61,12 +64,26 @@ impl Term { } }) .collect(); + if logs.is_empty() { Ok(()) } else { Err(logs) } } + + /// Returns a new artificial sink block with the given suffix attached to + /// its ID. + pub fn artificial_sink(id_suffix: &str) -> Self { + Self { + tid: Tid::artificial_sink_block(id_suffix), + term: Blk { + defs: Vec::with_capacity(0), + jmps: Vec::with_capacity(0), + indirect_jmp_targets: Vec::with_capacity(0), + }, + } + } } impl fmt::Display for Blk { diff --git a/src/cwe_checker_lib/src/intermediate_representation/project.rs b/src/cwe_checker_lib/src/intermediate_representation/project.rs index 31e3694e3..3829aebb4 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project.rs @@ -5,7 +5,6 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; /// Contains implementation of the block duplication normalization pass. mod block_duplication_normalization; use block_duplication_normalization::*; -/// Contains implementation of the propagate control flow normalization pass. mod propagate_control_flow; use propagate_control_flow::*; @@ -109,107 +108,189 @@ impl Project { } } - /// Replace the return-to TID of calls to extern symbols that are marked as non-returning (e.g. `exit(..)`) - /// with the provided TID of an artificial sink block. + /// Replaces the return-to TID of calls to non-returning functions with the + /// TID of the artificial sink block in the caller. /// - /// Returns `true` if at least one return target has been replaced this way. - fn retarget_calls_to_non_returning_symbols_to_artificial_sink( - &mut self, - dummy_blk_tid: &Tid, - ) -> bool { - let mut has_at_least_one_jmp_been_retargeted = false; + /// We distinguish two kinds of non-returning functions: + /// + /// - extern symbols that are marked as non-returning, e.g.,`exit(..)`, + /// - functions without a return instruction. + /// + /// For calls to the latter functions, no [`CallReturn`] nodes and + /// corresponding edges will be generated in the CFG. This implies that no + /// interprocedural analysis will happen for those calls. Furthermore, the + /// missing incoming edge to the return node implies that the node may be + /// optimized away by the [control flow propagation pass]. The reference to + /// the return site is now "dangling" and may lead to panics when + /// constructing the CFG (Issue #461). + /// + /// Thus, we lose nothing if we retarget the return block, even if our + /// analysis is incorrect and the callee in fact returns to the originally + /// indicated site. Cases where we misclassify a callee include: + /// + /// - functions ending in an indirect tail jump, + /// - epilogs like `lrd pc, [sp], #0x04` that are essentially a ret but + /// Ghidra sometimes thinks its an indirect jump, + /// - cases where the callee code that we get from Gidra is incomplete. + /// + /// This heuristic works better when the block-to-sub mapping is unique + /// since this pass may inline return site into callees that end in a tail + /// jump, i.e., call this after [`make_block_to_sub_mapping_unique`]. + /// This pass preserves a unique block-to-sub mapping. + /// + /// [`CallReturn`]: crate::analysis::graph::Node::CallReturn + /// [control flow propagation pass]: mod@propagate_control_flow + #[must_use] + fn retarget_non_returning_calls_to_artificial_sink(&mut self) -> Vec { + let (non_returning_subs, mut log_messages) = self.find_non_returning_subs(); + + // INVARIANT: A unique block-to-sub mapping is preserved. + for sub in self + .program + .term + .subs + .values_mut() + .filter(|sub| !sub.tid.is_artificial_sink_sub()) + { + let sub_id_suffix = sub.id_suffix(); + let mut one_or_more_call_retargeted = false; - for sub in self.program.term.subs.values_mut() { for block in sub.term.blocks.iter_mut() { for jmp in block.term.jmps.iter_mut() { - if let Jmp::Call { + let Jmp::Call { target, return_: Some(return_tid), } = &mut jmp.term + else { + continue; + }; + + if return_tid.is_artificial_sink_block(&sub_id_suffix) { + // The call is already returning to the function's + // artificial sink so there is nothing to do. + continue; + } else if let Some(extern_symbol) = self.program.term.extern_symbols.get(target) { - if let Some(extern_symbol) = self.program.term.extern_symbols.get(target) { - if extern_symbol.no_return { - // Call to an extern symbol that does not return. - *return_tid = dummy_blk_tid.clone(); - has_at_least_one_jmp_been_retargeted = true; - } + if extern_symbol.no_return { + // Reroute returns from calls to non-returning + // library functions. + *return_tid = Tid::artificial_sink_block(&sub_id_suffix); + + one_or_more_call_retargeted = true; } + } else if non_returning_subs.contains(target) { + // Reroute returns from calls to non-returning + // functions within the program. + log_messages.push(LogMessage::new_info(format!( + "Call @ {} to {} does not return to {}.", + jmp.tid, target, return_tid + ))); + + *return_tid = Tid::artificial_sink_block(&sub_id_suffix); + + one_or_more_call_retargeted = true; } } } + + // Add artificial sink block if required. + if one_or_more_call_retargeted { + sub.add_artifical_sink(); + } } - has_at_least_one_jmp_been_retargeted + + log_messages } - /// Replace jumps to nonexisting TIDs with jumps to a dummy target - /// representing an artificial sink in the control flow graph. - /// Return a log message for each replaced jump target. - /// Also retarget the return address of extern symbol calls that are marked as non-returning to the artificial sink. - /// - /// Nonexisting jump targets may be generated by the Ghidra backend - /// if the data at the target address is not a valid assembly instruction. - #[must_use] - fn remove_references_to_nonexisting_tids_and_retarget_non_returning_calls( - &mut self, - ) -> Vec { - // Gather all existing jump targets + /// Returns the set of all subs without a return instruction. + fn find_non_returning_subs(&self) -> (HashSet, Vec) { + let mut log_messages = Vec::new(); + let non_returning_subs = self + .program + .term + .subs + .values() + .filter_map(|sub| { + let sub_returns = sub.term.blocks.iter().any(|block| { + block + .term + .jmps + .iter() + .any(|jmp| matches!(jmp.term, Jmp::Return(..))) + }); + + if sub_returns || sub.tid.is_artificial_sink_sub() { + None + } else { + log_messages.push(LogMessage::new_info(format!( + "{} is non-returning.", + sub.tid + ))); + + Some(sub.tid.clone()) + } + }) + .collect(); + + (non_returning_subs, log_messages) + } + + /// Adds a function that serves as an artificial sink in the CFG. + fn add_artifical_sink(&mut self) { + self.program + .term + .subs + .insert(Tid::artificial_sink_sub(), Term::::artificial_sink()); + } + + /// Returns the set of all valid jump targets. + fn find_all_jump_targets(&self) -> HashSet { let mut jump_target_tids = HashSet::new(); + for sub in self.program.term.subs.values() { jump_target_tids.insert(sub.tid.clone()); for block in sub.term.blocks.iter() { jump_target_tids.insert(block.tid.clone()); } } + for symbol_tid in self.program.term.extern_symbols.keys() { jump_target_tids.insert(symbol_tid.clone()); } - // Replace all jumps to non-existing jump targets with jumps to dummy targets - let dummy_sub_tid = Tid::new("Artificial Sink Sub"); - let dummy_blk_tid = Tid::new("Artificial Sink Block"); + + jump_target_tids + } + + /// Replace jumps to nonexisting TIDs with jumps to a dummy target + /// representing an artificial sink in the control flow graph. + /// Return a log message for each replaced jump target. + /// + /// Nonexisting jump targets may be generated by the Ghidra backend + /// if the data at the target address is not a valid assembly instruction. + #[must_use] + fn remove_references_to_nonexisting_tids(&mut self) -> Vec { let mut log_messages = Vec::new(); + let all_jump_targets = self.find_all_jump_targets(); + + // Replace all jumps to non-existing jump targets with jumps to dummy + // targets. for sub in self.program.term.subs.values_mut() { for block in sub.term.blocks.iter_mut() { if let Err(mut logs) = - block.remove_nonexisting_indirect_jump_targets(&jump_target_tids) + block.remove_nonexisting_indirect_jump_targets(&all_jump_targets) { log_messages.append(&mut logs); } for jmp in block.term.jmps.iter_mut() { - if let Err(log_msg) = jmp.retarget_nonexisting_jump_targets_to_dummy_tid( - &jump_target_tids, - &dummy_sub_tid, - &dummy_blk_tid, - ) { + if let Err(log_msg) = + jmp.retarget_nonexisting_jump_targets_to_artificial_sink(&all_jump_targets) + { log_messages.push(log_msg); } } } } - // Also replace return targets of calls to non-returning extern symbols - let dummy_blk_needs_to_be_added = - self.retarget_calls_to_non_returning_symbols_to_artificial_sink(&dummy_blk_tid); - // If at least one dummy jump was inserted, add the corresponding dummy sub and block to the program. - if dummy_blk_needs_to_be_added || !log_messages.is_empty() { - let dummy_sub: Term = Term { - tid: dummy_sub_tid, - term: Sub { - name: "Artificial Sink Sub".to_string(), - blocks: vec![Term { - tid: dummy_blk_tid, - term: Blk { - defs: Vec::new(), - jmps: Vec::new(), - indirect_jmp_targets: Vec::new(), - }, - }], - calling_convention: None, - }, - }; - self.program - .term - .subs - .insert(dummy_sub.tid.clone(), dummy_sub); - } + log_messages } @@ -289,10 +370,13 @@ impl Project { #[must_use] pub fn normalize(&mut self) -> Vec { let mut logs = self.remove_duplicate_tids(); + self.add_artifical_sink(); + logs.append(self.remove_references_to_nonexisting_tids().as_mut()); + make_block_to_sub_mapping_unique(self); logs.append( - &mut self.remove_references_to_nonexisting_tids_and_retarget_non_returning_calls(), + self.retarget_non_returning_calls_to_artificial_sink() + .as_mut(), ); - make_block_to_sub_mapping_unique(self); crate::analysis::expression_propagation::propagate_input_expression(self); self.substitute_trivial_expressions(); crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self); @@ -307,29 +391,32 @@ impl Project { } impl Term { - /// If the TID of a jump target or return target is not contained in `known_tids` - /// replace it with a dummy TID and return an error message. - fn retarget_nonexisting_jump_targets_to_dummy_tid( + /// If the TID of a jump target, call target, or return target is not + /// contained in in the known jump targets, replace it with a dummy TID and + /// return an error message. + fn retarget_nonexisting_jump_targets_to_artificial_sink( &mut self, - known_tids: &HashSet, - dummy_sub_tid: &Tid, - dummy_blk_tid: &Tid, + all_jump_targets: &HashSet, ) -> Result<(), LogMessage> { use Jmp::*; match &mut self.term { - BranchInd(_) => (), - Branch(tid) | CBranch { target: tid, .. } if known_tids.get(tid).is_none() => { + BranchInd(_) => Ok(()), + Branch(tid) | CBranch { target: tid, .. } if !all_jump_targets.contains(tid) => { let error_msg = format!("Jump target at {} does not exist", tid.address); let error_log = LogMessage::new_error(error_msg).location(self.tid.clone()); - *tid = dummy_blk_tid.clone(); - return Err(error_log); + + *tid = Tid::artificial_sink_block(""); + + Err(error_log) } - Call { target, return_ } if known_tids.get(target).is_none() => { + Call { target, return_ } if !all_jump_targets.contains(target) => { let error_msg = format!("Call target at {} does not exist", target.address); let error_log = LogMessage::new_error(error_msg).location(self.tid.clone()); - *target = dummy_sub_tid.clone(); + + *target = Tid::artificial_sink_sub(); *return_ = None; - return Err(error_log); + + Err(error_log) } Call { return_: Some(return_tid), @@ -342,15 +429,16 @@ impl Term { | CallOther { return_: Some(return_tid), .. - } if known_tids.get(return_tid).is_none() => { + } if !all_jump_targets.contains(return_tid) => { let error_msg = format!("Return target at {} does not exist", return_tid.address); let error_log = LogMessage::new_error(error_msg).location(self.tid.clone()); - *return_tid = dummy_blk_tid.clone(); - return Err(error_log); + + *return_tid = Tid::artificial_sink_block(""); + + Err(error_log) } - _ => (), + _ => Ok(()), } - Ok(()) } } @@ -366,12 +454,8 @@ mod tests { }; assert_eq!(jmp_term.term, Jmp::Branch(Tid::new("nonexisting_target"))); assert!(jmp_term - .retarget_nonexisting_jump_targets_to_dummy_tid( - &HashSet::new(), - &Tid::new("dummy_sub"), - &Tid::new("dummy_blk") - ) + .retarget_nonexisting_jump_targets_to_artificial_sink(&HashSet::new(),) .is_err()); - assert_eq!(jmp_term.term, Jmp::Branch(Tid::new("dummy_blk"))); + assert_eq!(jmp_term.term, Jmp::Branch(Tid::artificial_sink_block(""))); } } diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 2f50f55a3..eed888509 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -1,75 +1,134 @@ -use crate::analysis::graph::{Edge, Graph, Node}; +//! Control Flow Propagation Normalization Pass +//! +//! The `propagate_control_flow` normalization pass tries to simplify the +//! representation of sequences of if-else blocks that all have the same +//! condition. After this transformation the program should be of a form where +//! either all or none of the blocks are executed. Such sequences are often +//! generated by sequences of conditional assignment assembly instructions. +//! +//! In addition to the above, the pass also removes blocks that consist of a +//! single, unconditional jump. +//! +//! For each "re-targetalbe" intraprocedural control flow transfer, i.e., +//! call-returns and (conditional) jumps, the pass computes a new target that is +//! equivalent to the old target but skips zero or more intermediate blocks. +//! Knowledge about conditions that are always true when a particular branch is +//! executed are used to resolve the target of intermediate conditional jumps. +//! +//! Lastly, the newly bypassed blocks are considered dead code and are removed. + +use crate::analysis::graph::{self, Edge, Graph, Node}; use crate::intermediate_representation::*; + +use std::collections::{BTreeSet, HashMap, HashSet}; + use petgraph::graph::NodeIndex; use petgraph::Direction::Incoming; -use std::collections::{BTreeSet, HashMap, HashSet}; -/// The `propagate_control_flow` normalization pass tries to simplify the representation of -/// sequences of if-else blocks that all have the same condition -/// so that they are either all executed or none of the blocks are executed. -/// Such sequences are often generated by sequences of conditional assignment assembly instructions. +/// Performs the Control Flow Propagation normalization pass. /// -/// To simplify the generated control flow graph -/// (and thus propagate the knowledge that either all or none of these blocks are executed to the control flow graph) -/// we look for sequences of (conditional) jumps where the final jump target is determined by the source of the first jump -/// (because we know that the conditionals for all jumps evaluate to the same value along the sequence). -/// For such a sequence we then retarget the destination of the first jump to the final jump destination of the sequence. -/// Lastly, the newly bypassed blocks are considered dead code and are removed. +/// See the module-level documentation for more information on what this pass +/// does. pub fn propagate_control_flow(project: &mut Project) { - let cfg = crate::analysis::graph::get_program_cfg(&project.program); - let nodes_without_incoming_edges_at_beginning = get_nodes_without_incoming_edge(&cfg); + let cfg_before_normalization = graph::get_program_cfg(&project.program); + let nodes_without_incoming_edges_at_beginning = + get_nodes_without_incoming_edge(&cfg_before_normalization); let mut jmps_to_retarget = HashMap::new(); - for node in cfg.node_indices() { - if let Node::BlkStart(block, sub) = cfg[node] { - // Check whether we already know the result of a conditional at the end of the block - let known_conditional_result = get_known_conditional_at_end_of_block(&cfg, node); - // Check whether we can propagate the control flow for outgoing jumps - match &block.term.jmps[..] { - [Term { - term: Jmp::Branch(target), - tid: jump_tid, - }] => { - if let Some(true_condition) = &known_conditional_result { - if let Some(new_target) = - find_target_for_retargetable_jump(target, &sub.term, true_condition) - { - jmps_to_retarget.insert(jump_tid.clone(), new_target); - } - } + for node in cfg_before_normalization.node_indices() { + let Node::BlkStart(block, sub) = cfg_before_normalization[node] else { + continue; + }; + // Conditions that we know to be true "on" a particular outgoing + // edge. + let mut true_conditions = Vec::with_capacity(2); + // Check if some condition must be true at the beginning of the block, + // and still holds after all DEFs are executed. + if let Some(block_precondition) = + get_block_precondition_after_defs(&cfg_before_normalization, node) + { + true_conditions.push(block_precondition); + } + match &block.term.jmps[..] { + [Term { + tid: call_tid, + term: + Jmp::Call { + target: _, + return_: Some(return_target), + }, + }] + | [Term { + tid: call_tid, + term: + Jmp::CallInd { + target: _, + return_: Some(return_target), + }, + }] + | [Term { + tid: call_tid, + term: + Jmp::CallOther { + description: _, + return_: Some(return_target), + }, + }] => { + if let Some(new_target) = find_target_for_retargetable_jump( + return_target, + &sub.term, + // Call may have side-effects that invalidate our + // knowledge about any condition we know to be true + // after execution of all DEFs in a block. + &Vec::with_capacity(0), + ) { + jmps_to_retarget.insert(call_tid.clone(), new_target); } - [Term { - term: - Jmp::CBranch { - condition, - target: if_target, - }, - tid: jump_tid_if, - }, Term { - term: Jmp::Branch(else_target), - tid: jump_tid_else, - }] => { - if let Some(new_target) = - find_target_for_retargetable_jump(if_target, &sub.term, condition) - { - jmps_to_retarget.insert(jump_tid_if.clone(), new_target); - } - if let Some(new_target) = find_target_for_retargetable_jump( - else_target, - &sub.term, - &negate_condition(condition.clone()), - ) { - jmps_to_retarget.insert(jump_tid_else.clone(), new_target); - } + } + [Term { + tid: jump_tid, + term: Jmp::Branch(target), + }] => { + if let Some(new_target) = + find_target_for_retargetable_jump(target, &sub.term, &true_conditions) + { + jmps_to_retarget.insert(jump_tid.clone(), new_target); + } + } + [Term { + term: + Jmp::CBranch { + condition, + target: if_target, + }, + tid: jump_tid_if, + }, Term { + term: Jmp::Branch(else_target), + tid: jump_tid_else, + }] => { + true_conditions.push(condition.clone()); + if let Some(new_target) = + find_target_for_retargetable_jump(if_target, &sub.term, &true_conditions) + { + jmps_to_retarget.insert(jump_tid_if.clone(), new_target); + } + + let condition = true_conditions.pop().unwrap(); + true_conditions.push(negate_condition(condition)); + if let Some(new_target) = + find_target_for_retargetable_jump(else_target, &sub.term, &true_conditions) + { + jmps_to_retarget.insert(jump_tid_else.clone(), new_target); } - _ => (), } + _ => (), } } retarget_jumps(project, jmps_to_retarget); - let cfg = crate::analysis::graph::get_program_cfg(&project.program); - let nodes_without_incoming_edges_at_end = get_nodes_without_incoming_edge(&cfg); + let cfg_after_normalization = graph::get_program_cfg(&project.program); + let nodes_without_incoming_edges_at_end = + get_nodes_without_incoming_edge(&cfg_after_normalization); remove_new_orphaned_blocks( project, @@ -78,44 +137,67 @@ pub fn propagate_control_flow(project: &mut Project) { ); } -/// Insert the new target TIDs into jump instructions for which a new target was computed. +/// Inserts the new target TIDs into jump instructions for which a new target +/// was computed. fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap) { for sub in project.program.term.subs.values_mut() { for blk in sub.term.blocks.iter_mut() { for jmp in blk.term.jmps.iter_mut() { - if let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) { - match &mut jmp.term { - Jmp::Branch(target) | Jmp::CBranch { target, .. } => *target = new_target, - _ => panic!("Unexpected type of jump encountered."), + let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) else { + continue; + }; + match &mut jmp.term { + Jmp::Branch(target) + | Jmp::CBranch { target, .. } + | Jmp::Call { + target: _, + return_: Some(target), } + | Jmp::CallInd { + target: _, + return_: Some(target), + } + | Jmp::CallOther { + description: _, + return_: Some(target), + } => *target = new_target, + _ => panic!("Unexpected type of jump encountered."), } } } } } -/// Under the assumption that the given `true_condition` expression evaluates to `true`, -/// check whether we can retarget jumps to the given target to another final jump target. -/// I.e. we follow sequences of jumps that are not interrupted by [`Def`] instructions to their final jump target -/// using the `true_condition` to resolve the targets of conditional jumps if possible. +/// Under the assumption that the given `true_conditions` expressions all +/// evaluate to `true`, check whether we can retarget jumps to the given target +/// to another final jump target. +/// +/// In other words, we follow sequences of jumps that are not interrupted by +/// [`Def`] instructions (or other things that may have side-effects) to their +/// final jump target using the `true_condition` to resolve the targets of +/// conditional jumps if possible. fn find_target_for_retargetable_jump( target: &Tid, sub: &Sub, - true_condition: &Expression, + true_conditions: &[Expression], ) -> Option { let mut visited_tids = BTreeSet::from([target.clone()]); let mut new_target = target; + while let Some(block) = sub.blocks.iter().find(|blk| blk.tid == *new_target) { - if let Some(retarget) = check_for_retargetable_block(block, true_condition) { - if !visited_tids.insert(retarget.clone()) { - // The target was already visited, so we abort the search to avoid infinite loops. - break; - } - new_target = retarget; - } else { + let Some(retarget) = check_for_retargetable_block(block, true_conditions) else { + break; + }; + + if !visited_tids.insert(retarget.clone()) { + // The target was already visited, so we abort the search to avoid + // infinite loops. break; } + + new_target = retarget; } + if new_target != target { Some(new_target.clone()) } else { @@ -124,16 +206,18 @@ fn find_target_for_retargetable_jump( } /// Check whether the given block does not contain any [`Def`] instructions. -/// If yes, check whether the target of the jump at the end of the block is predictable -/// under the assumption that the given `true_condition` expression evaluates to true. -/// If it can be predicted, return the target of the jump. +/// If yes, check whether the target of the jump at the end of the block is +/// predictable under the assumption that the given `true_conditions` +/// expressions all evaluate to true. If it can be predicted, return the target +/// of the jump. fn check_for_retargetable_block<'a>( block: &'a Term, - true_condition: &Expression, + true_conditions: &[Expression], ) -> Option<&'a Tid> { if !block.term.defs.is_empty() { return None; } + match &block.term.jmps[..] { [Term { term: Jmp::Branch(target), @@ -149,35 +233,39 @@ fn check_for_retargetable_block<'a>( }, Term { term: Jmp::Branch(else_target), .. - }] => { + }] => true_conditions.iter().find_map(|true_condition| { if condition == true_condition { Some(if_target) - } else if *condition == negate_condition(true_condition.clone()) { + } else if *condition == negate_condition(true_condition.to_owned()) { Some(else_target) } else { None } - } + }), _ => None, } } -/// Check whether the given node in the control flow graph has exactly on incoming edge -/// and if that edge stems from a conditional jump. -/// If both are true, return the condition expression that needs to evaluate to true whenever this edge is taken. -fn check_if_single_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option { +/// Returns a condition that we know to be true before the execution of the +/// block. +/// +/// Checks whether all edges incoming to the given block are conditioned on the +/// same condition. If true, the shared condition is returned. +fn get_precondition_from_incoming_edges(graph: &Graph, node: NodeIndex) -> Option { let incoming_edges: Vec<_> = graph .edges_directed(node, petgraph::Direction::Incoming) .collect(); - if incoming_edges.len() == 1 { - match incoming_edges[0].weight() { + let mut first_condition: Option = None; + + for edge in incoming_edges.iter() { + let condition = match edge.weight() { Edge::Jump( Term { term: Jmp::CBranch { condition, .. }, .. }, None, - ) => Some(condition.clone()), + ) => condition.clone(), Edge::Jump( Term { term: Jmp::Branch(_), @@ -187,51 +275,67 @@ fn check_if_single_conditional_incoming(graph: &Graph, node: NodeIndex) -> Optio term: Jmp::CBranch { condition, .. }, .. }), - ) => Some(negate_condition(condition.clone())), - _ => None, + ) => negate_condition(condition.clone()), + _ => return None, + }; + + match &mut first_condition { + // First iteration. + None => first_condition = Some(condition), + // Same condition as first incoming edge. + Some(first_condition) if *first_condition == condition => continue, + // A different condition implies that we can not make a definitive + // statement. + _ => return None, } - } else { - None } + + first_condition } -/// Check if the block at the given `BlkStart` node only has one input edge stemming from a conditional jump. -/// If yes, check whether the conditional expression for that jump will still evaluate to true at the end of the block. +/// Returns a condition that we know to be true after the execution of all DEFs +/// in the block. +/// +/// Check if all incoming edges of the given `BlkStart` node are conditioned on +/// the same condition. +/// If yes, check whether the conditional expression will still evaluate to true +/// after the execution of all DEFs of the block. /// If yes, return the conditional expression. -fn get_known_conditional_at_end_of_block(cfg: &Graph, node: NodeIndex) -> Option { - if let Node::BlkStart(block, sub) = cfg[node] { - // Check whether we know the result of a conditional at the start of the block - let mut known_conditional_result: Option = - if block.tid != sub.term.blocks[0].tid { - check_if_single_conditional_incoming(cfg, node) - } else { - // Function start blocks always have incoming caller edges - // even if these edges are missing in the CFG because we do not know the callers. - None - }; - // If we have a known conditional result at the start of the block, - // check whether it will still hold true at the end of the block. - if let Some(conditional) = &known_conditional_result { - let input_vars = conditional.input_vars(); - for def in block.term.defs.iter() { - match &def.term { - Def::Assign { var, .. } | Def::Load { var, .. } => { - if input_vars.contains(&var) { - known_conditional_result = None; - break; - } - } - Def::Store { .. } => (), +fn get_block_precondition_after_defs(cfg: &Graph, node: NodeIndex) -> Option { + let Node::BlkStart(block, sub) = cfg[node] else { + return None; + }; + + if block.tid == sub.term.blocks[0].tid { + // Function start blocks always have incoming caller edges, even if + // these edges are missing in the CFG because we do not know the + // callers. + return None; + } + + // Check whether we know the result of a conditional at the start of the + // block. + let block_precondition = get_precondition_from_incoming_edges(cfg, node)?; + + // If we have a known conditional result at the start of the block, + // check whether it will still hold true at the end of the block. + let input_vars = block_precondition.input_vars(); + for def in block.term.defs.iter() { + match &def.term { + Def::Assign { var, .. } | Def::Load { var, .. } => { + if input_vars.contains(&var) { + return None; } } + Def::Store { .. } => (), } - known_conditional_result - } else { - None } + + Some(block_precondition) } -/// Negate the given boolean condition expression, removing double negations in the process. +/// Negate the given boolean condition expression, removing double negations in +/// the process. fn negate_condition(expr: Expression) -> Expression { if let Expression::UnOp { op: UnOpType::BoolNegate, @@ -247,18 +351,21 @@ fn negate_condition(expr: Expression) -> Expression { } } -/// Iterates the CFG and returns all node's blocks, that do not have an incoming edge. +/// Iterates the CFG and returns all nodes that do not have an incoming edge. fn get_nodes_without_incoming_edge(cfg: &Graph) -> HashSet { - let mut nodes_without_incoming_edges = HashSet::new(); - for node in cfg.node_indices() { - if cfg.neighbors_directed(node, Incoming).next().is_none() { - nodes_without_incoming_edges.insert(cfg[node].get_block().tid.clone()); - } - } - nodes_without_incoming_edges + cfg.node_indices() + .filter_map(|node| { + if cfg.neighbors_directed(node, Incoming).next().is_none() { + Some(cfg[node].get_block().tid.clone()) + } else { + None + } + }) + .collect() } -/// Calculates the difference of the orphaned blocks and removes them from the project. +/// Calculates the difference of the orphaned blocks and removes them from the +/// project. fn remove_new_orphaned_blocks( project: &mut Project, orphaned_blocks_before: HashSet, @@ -280,10 +387,15 @@ pub mod tests { use crate::{def, expr}; use std::collections::BTreeMap; - fn mock_condition_block(name: &str, if_target: &str, else_target: &str) -> Term { + fn mock_condition_block_custom( + name: &str, + if_target: &str, + else_target: &str, + condition: &str, + ) -> Term { let if_jmp = Jmp::CBranch { target: Tid::new(if_target), - condition: expr!("ZF:1"), + condition: expr!(condition), }; let if_jmp = Term { tid: Tid::new(name.to_string() + "_jmp_if"), @@ -305,6 +417,44 @@ pub mod tests { } } + fn mock_condition_block(name: &str, if_target: &str, else_target: &str) -> Term { + mock_condition_block_custom(name, if_target, else_target, "ZF:1") + } + + fn mock_jump_only_block(name: &str, return_target: &str) -> Term { + let jmp = Jmp::Branch(Tid::new(return_target)); + let jmp = Term { + tid: Tid::new(name.to_string() + "_jmp"), + term: jmp, + }; + let blk = Blk { + defs: Vec::new(), + jmps: vec![jmp], + indirect_jmp_targets: Vec::new(), + }; + Term { + tid: Tid::new(name), + term: blk, + } + } + + fn mock_ret_only_block(name: &str) -> Term { + let ret = Jmp::Return(expr!("0x0:8")); + let ret = Term { + tid: Tid::new(name.to_string() + "_ret"), + term: ret, + }; + let blk = Blk { + defs: Vec::new(), + jmps: vec![ret], + indirect_jmp_targets: Vec::new(), + }; + Term { + tid: Tid::new(name), + term: blk, + } + } + fn mock_block_with_defs(name: &str, return_target: &str) -> Term { let def = def![format!("{name}_def: r0:4 = r1:4")]; let jmp = Jmp::Branch(Tid::new(return_target)); @@ -323,6 +473,31 @@ pub mod tests { } } + fn mock_block_with_defs_and_call( + name: &str, + call_target: &str, + return_target: &str, + ) -> Term { + let def = def![format!("{name}_def: r0:4 = r1:4")]; + let call = Jmp::Call { + target: Tid::new(call_target), + return_: Some(Tid::new(return_target)), + }; + let call = Term { + tid: Tid::new(name.to_string() + "_call"), + term: call, + }; + let blk = Blk { + defs: vec![def], + jmps: vec![call], + indirect_jmp_targets: Vec::new(), + }; + Term { + tid: Tid::new(name), + term: blk, + } + } + #[test] fn test_propagate_control_flow() { let sub = Sub { @@ -360,4 +535,266 @@ pub mod tests { &expected_blocks[..] ); } + + #[test] + fn call_return_to_jump() { + let sub_1 = Sub { + name: "sub_1".to_string(), + calling_convention: None, + blocks: vec![ + mock_block_with_defs_and_call("call_blk", "sub_2", "jump_blk"), + mock_jump_only_block("jump_blk", "end_blk"), + mock_block_with_defs("end_blk", "end_blk"), + ], + }; + let sub_1 = Term { + tid: Tid::new("sub_1"), + term: sub_1, + }; + let sub_2 = Sub { + name: "sub_2".to_string(), + calling_convention: None, + blocks: vec![mock_ret_only_block("ret_blk")], + }; + let sub_2 = Term { + tid: Tid::new("sub_2"), + term: sub_2, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = + BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_block_with_defs_and_call("call_blk", "sub_2", "end_blk"), + // jump_blk removed since it has no incoming edges + mock_block_with_defs("end_blk", "end_blk"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub_1")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn call_return_to_cond_jump() { + let sub_1 = Sub { + name: "sub_1".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1", "call_blk", "end_blk_1"), + mock_block_with_defs_and_call("call_blk", "sub_2", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub_1 = Term { + tid: Tid::new("sub_1"), + term: sub_1, + }; + let sub_2 = Sub { + name: "sub_2".to_string(), + calling_convention: None, + blocks: vec![mock_ret_only_block("ret_blk")], + }; + let sub_2 = Term { + tid: Tid::new("sub_2"), + term: sub_2, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = + BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond_blk_1", "call_blk", "end_blk_1"), + mock_block_with_defs_and_call("call_blk", "sub_2", "cond_blk_2"), + // cond_blk_2 can not be skipped as call may modify the inputs to + // the conditional expresion. + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub_1")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn call_return_to_cond_jump_removed() { + let sub_1 = Sub { + name: "sub_1".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1", "cond_blk_2", "end_blk_1"), + mock_block_with_defs_and_call("call_blk", "sub_2", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub_1 = Term { + tid: Tid::new("sub_1"), + term: sub_1, + }; + let sub_2 = Sub { + name: "sub_2".to_string(), + calling_convention: None, + blocks: vec![mock_block_with_defs("loop_block", "loop_block")], + }; + let sub_2 = Term { + tid: Tid::new("sub_2"), + term: sub_2, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = + BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); + + let mut log_msg_non_returning = project.retarget_non_returning_calls_to_artificial_sink(); + assert_eq!( + log_msg_non_returning.pop().unwrap().text, + "Call @ call_blk_call to sub_2 does not return to cond_blk_2.".to_owned() + ); + assert_eq!( + log_msg_non_returning.pop().unwrap().text, + "sub_2 is non-returning.".to_owned() + ); + assert_eq!( + log_msg_non_returning.pop().unwrap().text, + "sub_1 is non-returning.".to_owned() + ); + + propagate_control_flow(&mut project); + + // Does not panic even though original call return block was removed. + let _ = graph::get_program_cfg(&project.program); + + let expected_blocks = vec![ + // `cond_blk_1` can be retarget. + mock_condition_block("cond_blk_1", "end_blk_2", "end_blk_1"), + // `call_blk` was re-targeted to artificial sink as it was deemed + // non-returning. + mock_block_with_defs_and_call("call_blk", "sub_2", "Artificial Sink Block_sub_1"), + // `cond_blk_2` was removed since cond_blk_1 was re-targeted. + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + // Artificial sink block was added to sub_1 since it did not exist. + Term::::artificial_sink("_sub_1"), + ]; + + assert_eq!( + &project.program.term.subs[&Tid::new("sub_1")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn multiple_incoming_same_condition() { + let sub = Sub { + name: "sub".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "def_blk_1", "end_blk_1"), + mock_block_with_defs("def_blk_1", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub = Term { + tid: Tid::new("sub"), + term: sub, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "def_blk_1", "end_blk_1"), + mock_block_with_defs("def_blk_1", "end_blk_2"), + // cond_blk_2 removed, since no incoming edge anymore + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn multiple_incoming_different_condition() { + let sub = Sub { + name: "sub".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "end_blk_1", "def_blk_1"), + mock_block_with_defs("def_blk_1", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub = Term { + tid: Tid::new("sub"), + term: sub, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "end_blk_1", "def_blk_1"), + mock_block_with_defs("def_blk_1", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn multiple_known_conditions() { + let sub = Sub { + name: "sub".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond1_blk_1", "cond2_blk", "end_blk_1"), + mock_condition_block_custom("cond2_blk", "cond1_blk_2", "end_blk_1", "CF:1"), + mock_condition_block("cond1_blk_2", "def_blk", "end_blk_1"), + mock_block_with_defs("def_blk", "end_blk_2"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub = Term { + tid: Tid::new("sub"), + term: sub, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond1_blk_1", "cond2_blk", "end_blk_1"), + mock_condition_block_custom("cond2_blk", "def_blk", "end_blk_1", "CF:1"), + // removed since no incoming edges + mock_block_with_defs("def_blk", "end_blk_2"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub")].term.blocks[..], + &expected_blocks[..] + ); + } } diff --git a/src/cwe_checker_lib/src/intermediate_representation/sub.rs b/src/cwe_checker_lib/src/intermediate_representation/sub.rs index 06ce8247a..7c8b71926 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/sub.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/sub.rs @@ -18,6 +18,52 @@ pub struct Sub { pub calling_convention: Option, } +impl Term { + /// Returns the ID suffix for this function. + pub fn id_suffix(&self) -> String { + format!("_{}", self.tid) + } + + /// Returns true iff the function has an artificial sink block. + pub fn has_artifical_sink(&self) -> bool { + let id_suffix = self.id_suffix(); + + self.term + .blocks + .iter() + .any(|blk| blk.tid.is_artificial_sink_block(&id_suffix)) + } + + /// Returns a new artificial sink sub. + pub fn artificial_sink() -> Self { + Self { + tid: Tid::artificial_sink_sub(), + term: Sub { + name: "Artificial Sink Sub".to_string(), + blocks: vec![Term::::artificial_sink("")], + calling_convention: None, + }, + } + } + + /// Adds an artificial sink block if there is none. + /// + /// Returns true iff the artificial sink block was added. + pub fn add_artifical_sink(&mut self) -> bool { + if self.has_artifical_sink() { + false + } else { + let id_suffix = self.id_suffix(); + + self.term + .blocks + .push(Term::::artificial_sink(&id_suffix)); + + true + } + } +} + /// A parameter or return argument of a function. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] pub enum Arg { diff --git a/src/cwe_checker_lib/src/intermediate_representation/term.rs b/src/cwe_checker_lib/src/intermediate_representation/term.rs index 91b26b2c6..d98acc76a 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/term.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/term.rs @@ -14,12 +14,25 @@ pub struct Tid { } impl Tid { + /// Prefix for IDs of artificial sinks in the control flow graph. + /// + /// Dummy blocks with such TIDs are added for different purposes, e.g., as + /// targets for jumps to non-existing targets or return targets for calls to + /// non-returning functions. + const ARTIFICIAL_SINK_BLOCK_ID_PREFIX: &'static str = "Artificial Sink Block"; + /// The ID of the artificial sink sub. + /// + /// This is used as the target for calls to non-existing functions. + const ARTIFICIAL_SINK_SUB_ID: &'static str = "Artificial Sink Sub"; + /// Address for use in IDs of terms that do not have an address. + const UNKNOWN_ADDRESS: &'static str = "UNKNOWN"; + /// Generate a new term identifier with the given ID string /// and with unknown address. pub fn new(val: T) -> Tid { Tid { id: val.to_string(), - address: "UNKNOWN".to_string(), + address: Self::UNKNOWN_ADDRESS.to_string(), } } @@ -47,6 +60,35 @@ impl Tid { address: address.to_string(), } } + + /// Returns a new ID for an artificial sink block with the given suffix. + pub fn artificial_sink_block(suffix: &str) -> Self { + Self { + id: format!("{}{}", Self::ARTIFICIAL_SINK_BLOCK_ID_PREFIX, suffix), + address: Self::UNKNOWN_ADDRESS.to_string(), + } + } + + /// Returns a new ID for the artificial sink sub. + pub fn artificial_sink_sub() -> Self { + Self { + id: Self::ARTIFICIAL_SINK_SUB_ID.to_string(), + address: Self::UNKNOWN_ADDRESS.to_string(), + } + } + + /// Returns true iff the ID is for the artificial sink block with the given + /// suffix. + pub fn is_artificial_sink_block(&self, suffix: &str) -> bool { + self.id.starts_with(Self::ARTIFICIAL_SINK_BLOCK_ID_PREFIX) + && self.has_id_suffix(suffix) + && self.address == Self::UNKNOWN_ADDRESS + } + + /// Returns true iff the ID is for the artificial sink sub. + pub fn is_artificial_sink_sub(&self) -> bool { + self.id == Self::ARTIFICIAL_SINK_SUB_ID && self.address == Self::UNKNOWN_ADDRESS + } } impl std::fmt::Display for Tid {