From 33cfdb60b85f4420fab4aa3e9b65403b8ca1c090 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Thu, 2 May 2024 10:00:50 +0200 Subject: [PATCH 01/11] lib/ir/project: propagate control flow for call returns This patch does two things: 1. It allows the re-targeting of jumps for which no known true condition is available. Without a known condition, only blocks that consist of a single, unconditional jump can be skipped. 2. It allows the re-targeting of call returns in the same way that we already do it for unconditional jumps. For calls we never have a known condition as side-effects may invalidate any knowledge we have after the execution of all DEFs in the block. Example: Before the optimization we might have code like this: BLK [blk_0040a9c4] DEF [instr_0040a9c4_0] ra:4 = 0x40a9cc:4 JMP [instr_0040a9c4_1] call sub_00403f80 ret blk_0040a9cc BLK [blk_0040a9cc] JMP [instr_0040a9cc_1] Jump to blk_0040a9d0 BLK [blk_0040a9d0] DEF [instr_0040a9d0_0] a0:4 = ((0x43:4 << 0x10:4) + 0xffffb730:4) JMP [instr_0040a9d0_1] Jump to blk_0040a9d4 whereas after the optimization it becomes: BLK [blk_0040a9c4] DEF [instr_0040a9c4_0] ra:4 = 0x40a9cc:4 JMP [instr_0040a9c4_1] call sub_00403f80 ret blk_0040a9d0 BLK [blk_0040a9d0] DEF [instr_0040a9d0_0] a0:4 = ((0x43:4 << 0x10:4) + 0xffffb730:4) JMP [instr_0040a9d0_1] Jump to blk_0040a9d4 Fixes: 2487aac ("remove dead code originating from control flow propagation (#384)") Closes: https://github.com/fkie-cad/cwe_checker/issues/461 Reported-by: https://github.com/ElDavoo Signed-off-by: Valentin Obst --- .../project/propagate_control_flow.rs | 110 +++++++++++++----- 1 file changed, 82 insertions(+), 28 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 2f50f55a3..fd9cab664 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -27,15 +27,50 @@ pub fn propagate_control_flow(project: &mut Project) { // Check whether we can propagate the control flow for outgoing jumps match &block.term.jmps[..] { [Term { - term: Jmp::Branch(target), + tid: call_tid, + term: + Jmp::Call { + target: _, + return_: Some(return_target), + }, + }] + | [Term { + tid: call_tid, + term: + Jmp::CallInd { + target: _, + return_: Some(return_target), + }, + }] + | [Term { + tid: call_tid, + term: + Jmp::CallOther { + description: _, + return_: Some(return_target), + }, + }] => { + if let Some(new_target) = find_target_for_retargetable_jump( + return_target, + &sub.term, + // Call may have side-effects that invalidate our + // knowledge about any condition we know to be true + // after execution of all DEFs in a block. + None, + ) { + jmps_to_retarget.insert(call_tid.clone(), new_target); + } + } + [Term { tid: jump_tid, + term: Jmp::Branch(target), }] => { - if let Some(true_condition) = &known_conditional_result { - if let Some(new_target) = - find_target_for_retargetable_jump(target, &sub.term, true_condition) - { - jmps_to_retarget.insert(jump_tid.clone(), new_target); - } + if let Some(new_target) = find_target_for_retargetable_jump( + target, + &sub.term, + known_conditional_result.as_ref(), + ) { + jmps_to_retarget.insert(jump_tid.clone(), new_target); } } [Term { @@ -50,14 +85,14 @@ pub fn propagate_control_flow(project: &mut Project) { tid: jump_tid_else, }] => { if let Some(new_target) = - find_target_for_retargetable_jump(if_target, &sub.term, condition) + find_target_for_retargetable_jump(if_target, &sub.term, Some(condition)) { jmps_to_retarget.insert(jump_tid_if.clone(), new_target); } if let Some(new_target) = find_target_for_retargetable_jump( else_target, &sub.term, - &negate_condition(condition.clone()), + Some(&negate_condition(condition.clone())), ) { jmps_to_retarget.insert(jump_tid_else.clone(), new_target); } @@ -85,7 +120,20 @@ fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap for jmp in blk.term.jmps.iter_mut() { if let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) { match &mut jmp.term { - Jmp::Branch(target) | Jmp::CBranch { target, .. } => *target = new_target, + Jmp::Branch(target) + | Jmp::CBranch { target, .. } + | Jmp::Call { + target: _, + return_: Some(target), + } + | Jmp::CallInd { + target: _, + return_: Some(target), + } + | Jmp::CallOther { + description: _, + return_: Some(target), + } => *target = new_target, _ => panic!("Unexpected type of jump encountered."), } } @@ -101,7 +149,7 @@ fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap fn find_target_for_retargetable_jump( target: &Tid, sub: &Sub, - true_condition: &Expression, + true_condition: Option<&Expression>, ) -> Option { let mut visited_tids = BTreeSet::from([target.clone()]); let mut new_target = target; @@ -129,27 +177,33 @@ fn find_target_for_retargetable_jump( /// If it can be predicted, return the target of the jump. fn check_for_retargetable_block<'a>( block: &'a Term, - true_condition: &Expression, + true_condition: Option<&Expression>, ) -> Option<&'a Tid> { if !block.term.defs.is_empty() { return None; } - match &block.term.jmps[..] { - [Term { - term: Jmp::Branch(target), - .. - }] => Some(target), - [Term { - term: - Jmp::CBranch { - target: if_target, - condition, - }, - .. - }, Term { - term: Jmp::Branch(else_target), - .. - }] => { + match (&block.term.jmps[..], true_condition) { + ( + [Term { + term: Jmp::Branch(target), + .. + }], + _, + ) => Some(target), + ( + [Term { + term: + Jmp::CBranch { + target: if_target, + condition, + }, + .. + }, Term { + term: Jmp::Branch(else_target), + .. + }], + Some(true_condition), + ) => { if condition == true_condition { Some(if_target) } else if *condition == negate_condition(true_condition.clone()) { From cf92361c35417077aba8be5cabd5eb7e96d247f4 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Thu, 2 May 2024 12:00:58 +0200 Subject: [PATCH 02/11] lib/ir/project: derive CF-propagation condition from multiple incoming edges If a basic block has multiple incoming edges that are all conditioned on the same condition, use this condition when retargeting the control flow transfer at the end of the block. Signed-off-by: Valentin Obst --- .../project/propagate_control_flow.rs | 119 +++++++++++++++--- 1 file changed, 105 insertions(+), 14 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index fd9cab664..327fe567b 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -216,22 +216,26 @@ fn check_for_retargetable_block<'a>( } } -/// Check whether the given node in the control flow graph has exactly on incoming edge -/// and if that edge stems from a conditional jump. -/// If both are true, return the condition expression that needs to evaluate to true whenever this edge is taken. -fn check_if_single_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option { +/// Returns a condition that we know to be true before the execution of the +/// block. +/// +/// Checks whether all edges incoming to the given block are conditioned on the +/// same condition. If true, the shared condition is returned. +fn check_if_same_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option { let incoming_edges: Vec<_> = graph .edges_directed(node, petgraph::Direction::Incoming) .collect(); - if incoming_edges.len() == 1 { - match incoming_edges[0].weight() { + let mut first_condition: Option = None; + + for edge in incoming_edges.iter() { + let condition = match edge.weight() { Edge::Jump( Term { term: Jmp::CBranch { condition, .. }, .. }, None, - ) => Some(condition.clone()), + ) => condition.clone(), Edge::Jump( Term { term: Jmp::Branch(_), @@ -241,23 +245,38 @@ fn check_if_single_conditional_incoming(graph: &Graph, node: NodeIndex) -> Optio term: Jmp::CBranch { condition, .. }, .. }), - ) => Some(negate_condition(condition.clone())), - _ => None, + ) => negate_condition(condition.clone()), + _ => return None, + }; + + match &mut first_condition { + // First iteration. + None => first_condition = Some(condition), + // Same condition as first incoming edge. + Some(prev_condition) if *prev_condition == condition => continue, + // A different condition implies that we can not make a definitive + // statement. + _ => return None, } - } else { - None } + + first_condition } -/// Check if the block at the given `BlkStart` node only has one input edge stemming from a conditional jump. -/// If yes, check whether the conditional expression for that jump will still evaluate to true at the end of the block. +/// Returns a condition that we know to be true after the execution of all DEFs +/// in the block. +/// +/// Check if all incoming edges of the given `BlkStart` node are conditioned on +/// the same condition. +/// If yes, check whether the conditional expression will still evaluate to true +/// after the execution of all DEFs of the block. /// If yes, return the conditional expression. fn get_known_conditional_at_end_of_block(cfg: &Graph, node: NodeIndex) -> Option { if let Node::BlkStart(block, sub) = cfg[node] { // Check whether we know the result of a conditional at the start of the block let mut known_conditional_result: Option = if block.tid != sub.term.blocks[0].tid { - check_if_single_conditional_incoming(cfg, node) + check_if_same_conditional_incoming(cfg, node) } else { // Function start blocks always have incoming caller edges // even if these edges are missing in the CFG because we do not know the callers. @@ -414,4 +433,76 @@ pub mod tests { &expected_blocks[..] ); } + + #[test] + fn multiple_incoming_same_condition() { + let sub = Sub { + name: "sub".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "def_blk_1", "end_blk_1"), + mock_block_with_defs("def_blk_1", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub = Term { + tid: Tid::new("sub"), + term: sub, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "def_blk_1", "end_blk_1"), + mock_block_with_defs("def_blk_1", "end_blk_2"), + // cond_blk_2 removed, since no incoming edge anymore + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn multiple_incoming_different_condition() { + let sub = Sub { + name: "sub".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "end_blk_1", "def_blk_1"), + mock_block_with_defs("def_blk_1", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub = Term { + tid: Tid::new("sub"), + term: sub, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond_blk_1_1", "def_blk_1", "end_blk_1"), + mock_condition_block("cond_blk_1_2", "end_blk_1", "def_blk_1"), + mock_block_with_defs("def_blk_1", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub")].term.blocks[..], + &expected_blocks[..] + ); + } } From c990563e0d9ee93940b6ac3f95fc73ec3239bbdc Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Thu, 2 May 2024 13:14:18 +0200 Subject: [PATCH 03/11] lib/ir/project: test control flow propagation for call returns Signed-off-by: Valentin Obst --- .../project/propagate_control_flow.rs | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 327fe567b..d2eee3a53 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -378,6 +378,40 @@ pub mod tests { } } + fn mock_jump_only_block(name: &str, return_target: &str) -> Term { + let jmp = Jmp::Branch(Tid::new(return_target)); + let jmp = Term { + tid: Tid::new(name.to_string() + "_jmp"), + term: jmp, + }; + let blk = Blk { + defs: Vec::new(), + jmps: vec![jmp], + indirect_jmp_targets: Vec::new(), + }; + Term { + tid: Tid::new(name), + term: blk, + } + } + + fn mock_ret_only_block(name: &str) -> Term { + let ret = Jmp::Return(expr!("0x0:8")); + let ret = Term { + tid: Tid::new(name.to_string() + "_ret"), + term: ret, + }; + let blk = Blk { + defs: Vec::new(), + jmps: vec![ret], + indirect_jmp_targets: Vec::new(), + }; + Term { + tid: Tid::new(name), + term: blk, + } + } + fn mock_block_with_defs(name: &str, return_target: &str) -> Term { let def = def![format!("{name}_def: r0:4 = r1:4")]; let jmp = Jmp::Branch(Tid::new(return_target)); @@ -396,6 +430,31 @@ pub mod tests { } } + fn mock_block_with_defs_and_call( + name: &str, + call_target: &str, + return_target: &str, + ) -> Term { + let def = def![format!("{name}_def: r0:4 = r1:4")]; + let call = Jmp::Call { + target: Tid::new(call_target), + return_: Some(Tid::new(return_target)), + }; + let call = Term { + tid: Tid::new(name.to_string() + "_call"), + term: call, + }; + let blk = Blk { + defs: vec![def], + jmps: vec![call], + indirect_jmp_targets: Vec::new(), + }; + Term { + tid: Tid::new(name), + term: blk, + } + } + #[test] fn test_propagate_control_flow() { let sub = Sub { @@ -434,6 +493,92 @@ pub mod tests { ); } + #[test] + fn call_return_to_jump() { + let sub_1 = Sub { + name: "sub_1".to_string(), + calling_convention: None, + blocks: vec![ + mock_block_with_defs_and_call("call_blk", "sub_2", "jump_blk"), + mock_jump_only_block("jump_blk", "end_blk"), + mock_block_with_defs("end_blk", "end_blk"), + ], + }; + let sub_1 = Term { + tid: Tid::new("sub_1"), + term: sub_1, + }; + let sub_2 = Sub { + name: "sub_2".to_string(), + calling_convention: None, + blocks: vec![mock_ret_only_block("ret_blk")], + }; + let sub_2 = Term { + tid: Tid::new("sub_2"), + term: sub_2, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = + BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_block_with_defs_and_call("call_blk", "sub_2", "end_blk"), + // jump_blk removed since it has no incoming edges + mock_block_with_defs("end_blk", "end_blk"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub_1")].term.blocks[..], + &expected_blocks[..] + ); + } + + #[test] + fn call_return_to_cond_jump() { + let sub_1 = Sub { + name: "sub_1".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1", "call_blk", "end_blk_1"), + mock_block_with_defs_and_call("call_blk", "sub_2", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub_1 = Term { + tid: Tid::new("sub_1"), + term: sub_1, + }; + let sub_2 = Sub { + name: "sub_2".to_string(), + calling_convention: None, + blocks: vec![mock_ret_only_block("ret_blk")], + }; + let sub_2 = Term { + tid: Tid::new("sub_2"), + term: sub_2, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = + BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond_blk_1", "call_blk", "end_blk_1"), + mock_block_with_defs_and_call("call_blk", "sub_2", "cond_blk_2"), + // cond_blk_2 can not be skipped as call may modify the inputs to + // the conditional expresion. + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub_1")].term.blocks[..], + &expected_blocks[..] + ); + } + #[test] fn multiple_incoming_same_condition() { let sub = Sub { From 7a98d2d47c0eadbc40d380731842c25d86ad595b Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Thu, 2 May 2024 16:32:22 +0200 Subject: [PATCH 04/11] lib/ir/project: CF-propagation with multiple conditions Remember precondition and branch condition when retrageting a block that ends with a conditional jump. Signed-off-by: Valentin Obst --- .../project/propagate_control_flow.rs | 131 ++++++++++++------ 1 file changed, 87 insertions(+), 44 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index d2eee3a53..11c3c979f 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -22,9 +22,12 @@ pub fn propagate_control_flow(project: &mut Project) { let mut jmps_to_retarget = HashMap::new(); for node in cfg.node_indices() { if let Node::BlkStart(block, sub) = cfg[node] { - // Check whether we already know the result of a conditional at the end of the block - let known_conditional_result = get_known_conditional_at_end_of_block(&cfg, node); - // Check whether we can propagate the control flow for outgoing jumps + // Conditions that we know to be true "on" a particular outgoing + // edge. + let mut true_conditions = Vec::new(); + if let Some(block_precondition) = get_known_conditional_at_end_of_block(&cfg, node) { + true_conditions.push(block_precondition); + } match &block.term.jmps[..] { [Term { tid: call_tid, @@ -56,7 +59,7 @@ pub fn propagate_control_flow(project: &mut Project) { // Call may have side-effects that invalidate our // knowledge about any condition we know to be true // after execution of all DEFs in a block. - None, + &Vec::new(), ) { jmps_to_retarget.insert(call_tid.clone(), new_target); } @@ -65,11 +68,9 @@ pub fn propagate_control_flow(project: &mut Project) { tid: jump_tid, term: Jmp::Branch(target), }] => { - if let Some(new_target) = find_target_for_retargetable_jump( - target, - &sub.term, - known_conditional_result.as_ref(), - ) { + if let Some(new_target) = + find_target_for_retargetable_jump(target, &sub.term, &true_conditions) + { jmps_to_retarget.insert(jump_tid.clone(), new_target); } } @@ -84,16 +85,18 @@ pub fn propagate_control_flow(project: &mut Project) { term: Jmp::Branch(else_target), tid: jump_tid_else, }] => { + true_conditions.push(condition.clone()); if let Some(new_target) = - find_target_for_retargetable_jump(if_target, &sub.term, Some(condition)) + find_target_for_retargetable_jump(if_target, &sub.term, &true_conditions) { jmps_to_retarget.insert(jump_tid_if.clone(), new_target); } - if let Some(new_target) = find_target_for_retargetable_jump( - else_target, - &sub.term, - Some(&negate_condition(condition.clone())), - ) { + + let condition = true_conditions.pop().unwrap(); + true_conditions.push(negate_condition(condition)); + if let Some(new_target) = + find_target_for_retargetable_jump(else_target, &sub.term, &true_conditions) + { jmps_to_retarget.insert(jump_tid_else.clone(), new_target); } } @@ -149,12 +152,12 @@ fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap fn find_target_for_retargetable_jump( target: &Tid, sub: &Sub, - true_condition: Option<&Expression>, + true_conditions: &[Expression], ) -> Option { let mut visited_tids = BTreeSet::from([target.clone()]); let mut new_target = target; while let Some(block) = sub.blocks.iter().find(|blk| blk.tid == *new_target) { - if let Some(retarget) = check_for_retargetable_block(block, true_condition) { + if let Some(retarget) = check_for_retargetable_block(block, true_conditions) { if !visited_tids.insert(retarget.clone()) { // The target was already visited, so we abort the search to avoid infinite loops. break; @@ -177,41 +180,36 @@ fn find_target_for_retargetable_jump( /// If it can be predicted, return the target of the jump. fn check_for_retargetable_block<'a>( block: &'a Term, - true_condition: Option<&Expression>, + true_conditions: &[Expression], ) -> Option<&'a Tid> { if !block.term.defs.is_empty() { return None; } - match (&block.term.jmps[..], true_condition) { - ( - [Term { - term: Jmp::Branch(target), - .. - }], - _, - ) => Some(target), - ( - [Term { - term: - Jmp::CBranch { - target: if_target, - condition, - }, - .. - }, Term { - term: Jmp::Branch(else_target), - .. - }], - Some(true_condition), - ) => { + + match &block.term.jmps[..] { + [Term { + term: Jmp::Branch(target), + .. + }] => Some(target), + [Term { + term: + Jmp::CBranch { + target: if_target, + condition, + }, + .. + }, Term { + term: Jmp::Branch(else_target), + .. + }] => true_conditions.iter().find_map(|true_condition| { if condition == true_condition { Some(if_target) - } else if *condition == negate_condition(true_condition.clone()) { + } else if *condition == negate_condition(true_condition.to_owned()) { Some(else_target) } else { None } - } + }), _ => None, } } @@ -353,10 +351,15 @@ pub mod tests { use crate::{def, expr}; use std::collections::BTreeMap; - fn mock_condition_block(name: &str, if_target: &str, else_target: &str) -> Term { + fn mock_condition_block_custom( + name: &str, + if_target: &str, + else_target: &str, + condition: &str, + ) -> Term { let if_jmp = Jmp::CBranch { target: Tid::new(if_target), - condition: expr!("ZF:1"), + condition: expr!(condition), }; let if_jmp = Term { tid: Tid::new(name.to_string() + "_jmp_if"), @@ -378,6 +381,10 @@ pub mod tests { } } + fn mock_condition_block(name: &str, if_target: &str, else_target: &str) -> Term { + mock_condition_block_custom(name, if_target, else_target, "ZF:1") + } + fn mock_jump_only_block(name: &str, return_target: &str) -> Term { let jmp = Jmp::Branch(Tid::new(return_target)); let jmp = Term { @@ -650,4 +657,40 @@ pub mod tests { &expected_blocks[..] ); } + + #[test] + fn multiple_known_conditions() { + let sub = Sub { + name: "sub".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond1_blk_1", "cond2_blk", "end_blk_1"), + mock_condition_block_custom("cond2_blk", "cond1_blk_2", "end_blk_1", "CF:1"), + mock_condition_block("cond1_blk_2", "def_blk", "end_blk_1"), + mock_block_with_defs("def_blk", "end_blk_2"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub = Term { + tid: Tid::new("sub"), + term: sub, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = BTreeMap::from([(Tid::new("sub"), sub)]); + + propagate_control_flow(&mut project); + let expected_blocks = vec![ + mock_condition_block("cond1_blk_1", "cond2_blk", "end_blk_1"), + mock_condition_block_custom("cond2_blk", "def_blk", "end_blk_1", "CF:1"), + // removed since no incoming edges + mock_block_with_defs("def_blk", "end_blk_2"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + assert_eq!( + &project.program.term.subs[&Tid::new("sub")].term.blocks[..], + &expected_blocks[..] + ); + } } From 58a951d5dfeb4e553a5fa2076c82a1a3ec917feb Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Fri, 3 May 2024 11:43:43 +0200 Subject: [PATCH 05/11] lib/analysis/graph: impl `ToJsonCompact` for `Graph` Signed-off-by: Valentin Obst --- src/cwe_checker_lib/src/analysis/graph.rs | 102 +++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/src/cwe_checker_lib/src/analysis/graph.rs b/src/cwe_checker_lib/src/analysis/graph.rs index 25087c366..8ce3d3699 100644 --- a/src/cwe_checker_lib/src/analysis/graph.rs +++ b/src/cwe_checker_lib/src/analysis/graph.rs @@ -44,11 +44,14 @@ use crate::intermediate_representation::*; use crate::prelude::*; -use crate::utils::log::LogMessage; -use petgraph::graph::DiGraph; +use crate::utils::{debug::ToJsonCompact, log::LogMessage}; use std::collections::{HashMap, HashSet}; pub use petgraph::graph::NodeIndex; +use petgraph::{ + graph::DiGraph, + visit::{EdgeRef, IntoNodeReferences}, +}; /// The graph type of an interprocedural control flow graph pub type Graph<'a> = DiGraph, Edge<'a>>; @@ -182,6 +185,25 @@ pub enum Edge<'a> { ReturnCombine(&'a Term), } +impl<'a> std::fmt::Display for Edge<'a> { + fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Block => { + write!(formatter, "Block") + } + Self::Jump(..) => { + write!(formatter, "Jump") + } + Self::Call { .. } => write!(formatter, "Call",), + Self::ExternCallStub { .. } => write!(formatter, "ExternCallStub",), + Self::CrCallStub => write!(formatter, "CrCallStub"), + Self::CrReturnStub => write!(formatter, "CrReturnStub"), + Self::CallCombine(..) => write!(formatter, "CallCombine"), + Self::ReturnCombine(..) => write!(formatter, "ReturnCombine"), + } + } +} + /// A builder struct for building graphs struct GraphBuilder<'a> { program: &'a Term, @@ -531,6 +553,82 @@ pub fn get_entry_nodes_of_subs(graph: &Graph) -> HashMap { sub_to_entry_node_map } +impl ToJsonCompact for Graph<'_> { + fn to_json_compact(&self) -> serde_json::Value { + let mut map = serde_json::Map::new(); + let mut node_counts_map = serde_json::Map::new(); + let mut edge_counts_map = serde_json::Map::new(); + let mut nodes_map = serde_json::Map::new(); + let mut edges_map = serde_json::Map::new(); + + let total_nodes = self.node_count(); + let mut blk_start_nodes = 0u64; + let mut blk_end_nodes = 0u64; + let mut call_return_nodes = 0u64; + let mut call_source_nodes = 0u64; + + for (idx, node) in self.node_references() { + nodes_map.insert(idx.index().to_string(), node.to_string().into()); + match node { + Node::BlkStart(..) => blk_start_nodes += 1, + Node::BlkEnd(..) => blk_end_nodes += 1, + Node::CallReturn { .. } => call_return_nodes += 1, + Node::CallSource { .. } => call_source_nodes += 1, + } + } + + node_counts_map.insert("total".into(), total_nodes.into()); + node_counts_map.insert("blk_start".into(), blk_start_nodes.into()); + node_counts_map.insert("blk_end".into(), blk_end_nodes.into()); + node_counts_map.insert("call_return".into(), call_return_nodes.into()); + node_counts_map.insert("call_source".into(), call_source_nodes.into()); + + let total_edges = self.edge_count(); + let mut block_edges = 0u64; + let mut jump_edges = 0u64; + let mut call_edges = 0u64; + let mut extern_call_stub_edges = 0u64; + let mut cr_call_stub_edges = 0u64; + let mut cr_return_stub_edges = 0u64; + let mut call_combine_edges = 0u64; + let mut return_combine_edges = 0u64; + + for edge in self.edge_references() { + edges_map.insert( + format!("{} -> {}", edge.source().index(), edge.target().index()), + edge.weight().to_string().into(), + ); + match edge.weight() { + Edge::Block => block_edges += 1, + Edge::Jump(..) => jump_edges += 1, + Edge::Call(..) => call_edges += 1, + Edge::ExternCallStub(..) => extern_call_stub_edges += 1, + Edge::CrCallStub => cr_call_stub_edges += 1, + Edge::CrReturnStub => cr_return_stub_edges += 1, + Edge::CallCombine(..) => call_combine_edges += 1, + Edge::ReturnCombine(..) => return_combine_edges += 1, + } + } + + edge_counts_map.insert("total".into(), total_edges.into()); + edge_counts_map.insert("block".into(), block_edges.into()); + edge_counts_map.insert("jump".into(), jump_edges.into()); + edge_counts_map.insert("call".into(), call_edges.into()); + edge_counts_map.insert("extern_call_stub".into(), extern_call_stub_edges.into()); + edge_counts_map.insert("cr_call_stub".into(), cr_call_stub_edges.into()); + edge_counts_map.insert("cr_return_stub".into(), cr_return_stub_edges.into()); + edge_counts_map.insert("call_combine".into(), call_combine_edges.into()); + edge_counts_map.insert("return_combine".into(), return_combine_edges.into()); + + map.insert("node_counts".into(), node_counts_map.into()); + map.insert("edge_counts".into(), edge_counts_map.into()); + map.insert("nodes".into(), nodes_map.into()); + map.insert("edges".into(), edges_map.into()); + + serde_json::Value::Object(map) + } +} + #[cfg(test)] mod tests { use crate::expr; From e9bd03d773a629d69a957c4d29f32398482d6493 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Fri, 3 May 2024 15:57:55 +0200 Subject: [PATCH 06/11] lib/ir/project: code cleanup of CF-propagation pass No functional changes. Hopefully. Signed-off-by: Valentin Obst --- .../project/propagate_control_flow.rs | 295 +++++++++--------- 1 file changed, 153 insertions(+), 142 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 11c3c979f..dbac0da1c 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -1,8 +1,10 @@ -use crate::analysis::graph::{Edge, Graph, Node}; +use crate::analysis::graph::{self, Edge, Graph, Node}; use crate::intermediate_representation::*; + +use std::collections::{BTreeSet, HashMap, HashSet}; + use petgraph::graph::NodeIndex; use petgraph::Direction::Incoming; -use std::collections::{BTreeSet, HashMap, HashSet}; /// The `propagate_control_flow` normalization pass tries to simplify the representation of /// sequences of if-else blocks that all have the same condition @@ -16,98 +18,103 @@ use std::collections::{BTreeSet, HashMap, HashSet}; /// For such a sequence we then retarget the destination of the first jump to the final jump destination of the sequence. /// Lastly, the newly bypassed blocks are considered dead code and are removed. pub fn propagate_control_flow(project: &mut Project) { - let cfg = crate::analysis::graph::get_program_cfg(&project.program); - let nodes_without_incoming_edges_at_beginning = get_nodes_without_incoming_edge(&cfg); + let cfg_before_normalization = graph::get_program_cfg(&project.program); + let nodes_without_incoming_edges_at_beginning = + get_nodes_without_incoming_edge(&cfg_before_normalization); let mut jmps_to_retarget = HashMap::new(); - for node in cfg.node_indices() { - if let Node::BlkStart(block, sub) = cfg[node] { - // Conditions that we know to be true "on" a particular outgoing - // edge. - let mut true_conditions = Vec::new(); - if let Some(block_precondition) = get_known_conditional_at_end_of_block(&cfg, node) { - true_conditions.push(block_precondition); + for node in cfg_before_normalization.node_indices() { + let Node::BlkStart(block, sub) = cfg_before_normalization[node] else { + continue; + }; + // Conditions that we know to be true "on" a particular outgoing + // edge. + let mut true_conditions = Vec::new(); + if let Some(block_precondition) = + get_block_precondition_after_defs(&cfg_before_normalization, node) + { + true_conditions.push(block_precondition); + } + match &block.term.jmps[..] { + [Term { + tid: call_tid, + term: + Jmp::Call { + target: _, + return_: Some(return_target), + }, + }] + | [Term { + tid: call_tid, + term: + Jmp::CallInd { + target: _, + return_: Some(return_target), + }, + }] + | [Term { + tid: call_tid, + term: + Jmp::CallOther { + description: _, + return_: Some(return_target), + }, + }] => { + if let Some(new_target) = find_target_for_retargetable_jump( + return_target, + &sub.term, + // Call may have side-effects that invalidate our + // knowledge about any condition we know to be true + // after execution of all DEFs in a block. + &Vec::new(), + ) { + jmps_to_retarget.insert(call_tid.clone(), new_target); + } } - match &block.term.jmps[..] { - [Term { - tid: call_tid, - term: - Jmp::Call { - target: _, - return_: Some(return_target), - }, - }] - | [Term { - tid: call_tid, - term: - Jmp::CallInd { - target: _, - return_: Some(return_target), - }, - }] - | [Term { - tid: call_tid, - term: - Jmp::CallOther { - description: _, - return_: Some(return_target), - }, - }] => { - if let Some(new_target) = find_target_for_retargetable_jump( - return_target, - &sub.term, - // Call may have side-effects that invalidate our - // knowledge about any condition we know to be true - // after execution of all DEFs in a block. - &Vec::new(), - ) { - jmps_to_retarget.insert(call_tid.clone(), new_target); - } + [Term { + tid: jump_tid, + term: Jmp::Branch(target), + }] => { + if let Some(new_target) = + find_target_for_retargetable_jump(target, &sub.term, &true_conditions) + { + jmps_to_retarget.insert(jump_tid.clone(), new_target); } - [Term { - tid: jump_tid, - term: Jmp::Branch(target), - }] => { - if let Some(new_target) = - find_target_for_retargetable_jump(target, &sub.term, &true_conditions) - { - jmps_to_retarget.insert(jump_tid.clone(), new_target); - } + } + [Term { + term: + Jmp::CBranch { + condition, + target: if_target, + }, + tid: jump_tid_if, + }, Term { + term: Jmp::Branch(else_target), + tid: jump_tid_else, + }] => { + true_conditions.push(condition.clone()); + if let Some(new_target) = + find_target_for_retargetable_jump(if_target, &sub.term, &true_conditions) + { + jmps_to_retarget.insert(jump_tid_if.clone(), new_target); } - [Term { - term: - Jmp::CBranch { - condition, - target: if_target, - }, - tid: jump_tid_if, - }, Term { - term: Jmp::Branch(else_target), - tid: jump_tid_else, - }] => { - true_conditions.push(condition.clone()); - if let Some(new_target) = - find_target_for_retargetable_jump(if_target, &sub.term, &true_conditions) - { - jmps_to_retarget.insert(jump_tid_if.clone(), new_target); - } - let condition = true_conditions.pop().unwrap(); - true_conditions.push(negate_condition(condition)); - if let Some(new_target) = - find_target_for_retargetable_jump(else_target, &sub.term, &true_conditions) - { - jmps_to_retarget.insert(jump_tid_else.clone(), new_target); - } + let condition = true_conditions.pop().unwrap(); + true_conditions.push(negate_condition(condition)); + if let Some(new_target) = + find_target_for_retargetable_jump(else_target, &sub.term, &true_conditions) + { + jmps_to_retarget.insert(jump_tid_else.clone(), new_target); } - _ => (), } + _ => (), } } retarget_jumps(project, jmps_to_retarget); - let cfg = crate::analysis::graph::get_program_cfg(&project.program); - let nodes_without_incoming_edges_at_end = get_nodes_without_incoming_edge(&cfg); + let cfg_after_normalization = graph::get_program_cfg(&project.program); + let nodes_without_incoming_edges_at_end = + get_nodes_without_incoming_edge(&cfg_after_normalization); remove_new_orphaned_blocks( project, @@ -121,24 +128,25 @@ fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap for sub in project.program.term.subs.values_mut() { for blk in sub.term.blocks.iter_mut() { for jmp in blk.term.jmps.iter_mut() { - if let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) { - match &mut jmp.term { - Jmp::Branch(target) - | Jmp::CBranch { target, .. } - | Jmp::Call { - target: _, - return_: Some(target), - } - | Jmp::CallInd { - target: _, - return_: Some(target), - } - | Jmp::CallOther { - description: _, - return_: Some(target), - } => *target = new_target, - _ => panic!("Unexpected type of jump encountered."), + let Some(new_target) = jmps_to_retarget.remove(&jmp.tid) else { + continue; + }; + match &mut jmp.term { + Jmp::Branch(target) + | Jmp::CBranch { target, .. } + | Jmp::Call { + target: _, + return_: Some(target), } + | Jmp::CallInd { + target: _, + return_: Some(target), + } + | Jmp::CallOther { + description: _, + return_: Some(target), + } => *target = new_target, + _ => panic!("Unexpected type of jump encountered."), } } } @@ -156,17 +164,20 @@ fn find_target_for_retargetable_jump( ) -> Option { let mut visited_tids = BTreeSet::from([target.clone()]); let mut new_target = target; + while let Some(block) = sub.blocks.iter().find(|blk| blk.tid == *new_target) { - if let Some(retarget) = check_for_retargetable_block(block, true_conditions) { - if !visited_tids.insert(retarget.clone()) { - // The target was already visited, so we abort the search to avoid infinite loops. - break; - } - new_target = retarget; - } else { + let Some(retarget) = check_for_retargetable_block(block, true_conditions) else { + break; + }; + + if !visited_tids.insert(retarget.clone()) { + // The target was already visited, so we abort the search to avoid infinite loops. break; } + + new_target = retarget; } + if new_target != target { Some(new_target.clone()) } else { @@ -219,7 +230,7 @@ fn check_for_retargetable_block<'a>( /// /// Checks whether all edges incoming to the given block are conditioned on the /// same condition. If true, the shared condition is returned. -fn check_if_same_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option { +fn get_precondition_from_incoming_edges(graph: &Graph, node: NodeIndex) -> Option { let incoming_edges: Vec<_> = graph .edges_directed(node, petgraph::Direction::Incoming) .collect(); @@ -251,7 +262,7 @@ fn check_if_same_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option< // First iteration. None => first_condition = Some(condition), // Same condition as first incoming edge. - Some(prev_condition) if *prev_condition == condition => continue, + Some(first_condition) if *first_condition == condition => continue, // A different condition implies that we can not make a definitive // statement. _ => return None, @@ -269,37 +280,35 @@ fn check_if_same_conditional_incoming(graph: &Graph, node: NodeIndex) -> Option< /// If yes, check whether the conditional expression will still evaluate to true /// after the execution of all DEFs of the block. /// If yes, return the conditional expression. -fn get_known_conditional_at_end_of_block(cfg: &Graph, node: NodeIndex) -> Option { - if let Node::BlkStart(block, sub) = cfg[node] { - // Check whether we know the result of a conditional at the start of the block - let mut known_conditional_result: Option = - if block.tid != sub.term.blocks[0].tid { - check_if_same_conditional_incoming(cfg, node) - } else { - // Function start blocks always have incoming caller edges - // even if these edges are missing in the CFG because we do not know the callers. - None - }; - // If we have a known conditional result at the start of the block, - // check whether it will still hold true at the end of the block. - if let Some(conditional) = &known_conditional_result { - let input_vars = conditional.input_vars(); - for def in block.term.defs.iter() { - match &def.term { - Def::Assign { var, .. } | Def::Load { var, .. } => { - if input_vars.contains(&var) { - known_conditional_result = None; - break; - } - } - Def::Store { .. } => (), +fn get_block_precondition_after_defs(cfg: &Graph, node: NodeIndex) -> Option { + let Node::BlkStart(block, sub) = cfg[node] else { + return None; + }; + + if block.tid == sub.term.blocks[0].tid { + // Function start blocks always have incoming caller edges + // even if these edges are missing in the CFG because we do not know the callers. + return None; + } + + // Check whether we know the result of a conditional at the start of the block + let block_precondition = get_precondition_from_incoming_edges(cfg, node)?; + + // If we have a known conditional result at the start of the block, + // check whether it will still hold true at the end of the block. + let input_vars = block_precondition.input_vars(); + for def in block.term.defs.iter() { + match &def.term { + Def::Assign { var, .. } | Def::Load { var, .. } => { + if input_vars.contains(&var) { + return None; } } + Def::Store { .. } => (), } - known_conditional_result - } else { - None } + + Some(block_precondition) } /// Negate the given boolean condition expression, removing double negations in the process. @@ -320,13 +329,15 @@ fn negate_condition(expr: Expression) -> Expression { /// Iterates the CFG and returns all node's blocks, that do not have an incoming edge. fn get_nodes_without_incoming_edge(cfg: &Graph) -> HashSet { - let mut nodes_without_incoming_edges = HashSet::new(); - for node in cfg.node_indices() { - if cfg.neighbors_directed(node, Incoming).next().is_none() { - nodes_without_incoming_edges.insert(cfg[node].get_block().tid.clone()); - } - } - nodes_without_incoming_edges + cfg.node_indices() + .filter_map(|node| { + if cfg.neighbors_directed(node, Incoming).next().is_none() { + Some(cfg[node].get_block().tid.clone()) + } else { + None + } + }) + .collect() } /// Calculates the difference of the orphaned blocks and removes them from the project. From 5d2a31c5b4446069f741c5f1811ffa8e962beb52 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Fri, 3 May 2024 16:41:31 +0200 Subject: [PATCH 07/11] lib/ir/project: update docs of CF-propagation pass No functional changes. Signed-off-by: Valentin Obst --- .../intermediate_representation/project.rs | 1 - .../project/propagate_control_flow.rs | 75 ++++++++++++------- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project.rs b/src/cwe_checker_lib/src/intermediate_representation/project.rs index 31e3694e3..5f3cd165c 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project.rs @@ -5,7 +5,6 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; /// Contains implementation of the block duplication normalization pass. mod block_duplication_normalization; use block_duplication_normalization::*; -/// Contains implementation of the propagate control flow normalization pass. mod propagate_control_flow; use propagate_control_flow::*; diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index dbac0da1c..89f0b2f59 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -1,3 +1,22 @@ +//! Control Flow Propagation Normalization Pass +//! +//! The `propagate_control_flow` normalization pass tries to simplify the +//! representation of sequences of if-else blocks that all have the same +//! condition. After this transformation the program should be of a form where +//! either all or none of the blocks are executed. Such sequences are often +//! generated by sequences of conditional assignment assembly instructions. +//! +//! In addition to the above, the pass also removes blocks that consist of a +//! single, unconditional jump. +//! +//! For each "re-targetalbe" intraprocedural control flow transfer, i.e., +//! call-returns and (conditional) jumps, the pass computes a new target that is +//! equivalent to the old target but skips zero or more intermediate blocks. +//! Knowledge about conditions that are always true when a particular branch is +//! executed are used to resolve the target of intermediate conditional jumps. +//! +//! Lastly, the newly bypassed blocks are considered dead code and are removed. + use crate::analysis::graph::{self, Edge, Graph, Node}; use crate::intermediate_representation::*; @@ -6,17 +25,10 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use petgraph::graph::NodeIndex; use petgraph::Direction::Incoming; -/// The `propagate_control_flow` normalization pass tries to simplify the representation of -/// sequences of if-else blocks that all have the same condition -/// so that they are either all executed or none of the blocks are executed. -/// Such sequences are often generated by sequences of conditional assignment assembly instructions. +/// Performs the Control Flow Propagation normalization pass. /// -/// To simplify the generated control flow graph -/// (and thus propagate the knowledge that either all or none of these blocks are executed to the control flow graph) -/// we look for sequences of (conditional) jumps where the final jump target is determined by the source of the first jump -/// (because we know that the conditionals for all jumps evaluate to the same value along the sequence). -/// For such a sequence we then retarget the destination of the first jump to the final jump destination of the sequence. -/// Lastly, the newly bypassed blocks are considered dead code and are removed. +/// See the module-level documentation for more information on what this pass +/// does. pub fn propagate_control_flow(project: &mut Project) { let cfg_before_normalization = graph::get_program_cfg(&project.program); let nodes_without_incoming_edges_at_beginning = @@ -30,6 +42,8 @@ pub fn propagate_control_flow(project: &mut Project) { // Conditions that we know to be true "on" a particular outgoing // edge. let mut true_conditions = Vec::new(); + // Check if some condition must be true at the beginning of the block, + // and still holds after all DEFs are executed. if let Some(block_precondition) = get_block_precondition_after_defs(&cfg_before_normalization, node) { @@ -123,7 +137,8 @@ pub fn propagate_control_flow(project: &mut Project) { ); } -/// Insert the new target TIDs into jump instructions for which a new target was computed. +/// Inserts the new target TIDs into jump instructions for which a new target +/// was computed. fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap) { for sub in project.program.term.subs.values_mut() { for blk in sub.term.blocks.iter_mut() { @@ -153,10 +168,14 @@ fn retarget_jumps(project: &mut Project, mut jmps_to_retarget: HashMap } } -/// Under the assumption that the given `true_condition` expression evaluates to `true`, -/// check whether we can retarget jumps to the given target to another final jump target. -/// I.e. we follow sequences of jumps that are not interrupted by [`Def`] instructions to their final jump target -/// using the `true_condition` to resolve the targets of conditional jumps if possible. +/// Under the assumption that the given `true_conditions` expressions all +/// evaluate to `true`, check whether we can retarget jumps to the given target +/// to another final jump target. +/// +/// In other words, we follow sequences of jumps that are not interrupted by +/// [`Def`] instructions (or other things that may have side-effects) to their +/// final jump target using the `true_condition` to resolve the targets of +/// conditional jumps if possible. fn find_target_for_retargetable_jump( target: &Tid, sub: &Sub, @@ -171,7 +190,8 @@ fn find_target_for_retargetable_jump( }; if !visited_tids.insert(retarget.clone()) { - // The target was already visited, so we abort the search to avoid infinite loops. + // The target was already visited, so we abort the search to avoid + // infinite loops. break; } @@ -186,9 +206,10 @@ fn find_target_for_retargetable_jump( } /// Check whether the given block does not contain any [`Def`] instructions. -/// If yes, check whether the target of the jump at the end of the block is predictable -/// under the assumption that the given `true_condition` expression evaluates to true. -/// If it can be predicted, return the target of the jump. +/// If yes, check whether the target of the jump at the end of the block is +/// predictable under the assumption that the given `true_conditions` +/// expressions all evaluate to true. If it can be predicted, return the target +/// of the jump. fn check_for_retargetable_block<'a>( block: &'a Term, true_conditions: &[Expression], @@ -286,12 +307,14 @@ fn get_block_precondition_after_defs(cfg: &Graph, node: NodeIndex) -> Option Option Expression { if let Expression::UnOp { op: UnOpType::BoolNegate, @@ -327,7 +351,7 @@ fn negate_condition(expr: Expression) -> Expression { } } -/// Iterates the CFG and returns all node's blocks, that do not have an incoming edge. +/// Iterates the CFG and returns all nodes that do not have an incoming edge. fn get_nodes_without_incoming_edge(cfg: &Graph) -> HashSet { cfg.node_indices() .filter_map(|node| { @@ -340,7 +364,8 @@ fn get_nodes_without_incoming_edge(cfg: &Graph) -> HashSet { .collect() } -/// Calculates the difference of the orphaned blocks and removes them from the project. +/// Calculates the difference of the orphaned blocks and removes them from the +/// project. fn remove_new_orphaned_blocks( project: &mut Project, orphaned_blocks_before: HashSet, From 15c2ff9e518e800fe118b4a03e65b646d6b2cc94 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Tue, 7 May 2024 23:29:03 +0200 Subject: [PATCH 08/11] lib/ir/project: add pass to retarget returns from non-returning calls The original fix for Issue #461 in Commit ("lib/ir/project: propagate control flow for call returns") was incomplete. The original problem was due to a call to a function without a return instruction "returning" to a block that could be optimized away in the propagate control flow pass. Retargeting the call return can only solve the issue when the return block can be retargeted (and the retarget is not optimized away), which is not the case for condition blocks. Thus, always retarget returns from calls to functions without a ret to the artificial sink. Link: https://github.com/fkie-cad/cwe_checker/pull/462#issuecomment-2093656674 Signed-off-by: Valentin Obst --- .../intermediate_representation/project.rs | 152 +++++++++++++----- 1 file changed, 108 insertions(+), 44 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project.rs b/src/cwe_checker_lib/src/intermediate_representation/project.rs index 5f3cd165c..503782f03 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project.rs @@ -108,15 +108,68 @@ impl Project { } } - /// Replace the return-to TID of calls to extern symbols that are marked as non-returning (e.g. `exit(..)`) - /// with the provided TID of an artificial sink block. + /// Replaces the return-to TID of calls to non-returning functions with the + /// TID of the artificial sink block. /// - /// Returns `true` if at least one return target has been replaced this way. - fn retarget_calls_to_non_returning_symbols_to_artificial_sink( - &mut self, - dummy_blk_tid: &Tid, - ) -> bool { - let mut has_at_least_one_jmp_been_retargeted = false; + /// We distinguish two kinds of non-returning functions: + /// + /// - extern symbols that are marked as non-returning (e.g. `exit(..)`), + /// - functions without a return instruction. + /// + /// For calls to the latter functions, no [`CallReturn`] nodes and + /// corresponding edges will be generated in the CFG. This implies that no + /// interprocedural analysis will happen for those calls. Furthermore, the + /// missing incoming edge to the return node implies that the node may be + /// optimized away by the [control flow propagation pass]. The reference to + /// the return site is now "dangling" and may lead to panics when + /// constructing the CFG (Issue #461). + /// + /// Thus, we lose nothing if we retarget the return block, even if our + /// analysis is incorrect and the callee in fact returns to the originally + /// indicated site. Cases where we misclassify a callee include: + /// + /// - functions ending in an indirect tail jump, + /// - epilogs like `lrd pc, [sp], #0x04` that are essentially a ret but + /// Ghidra sometimes thinks its an indirect jump, + /// - cases where the callee code that we get from Gidra is incomplete. + /// + /// This heuristic works better when the sub-to-block mapping is unique + /// since this pass may inline return site into callees that end in a tail + /// jump, i.e., call this after [`make_block_to_sub_mapping_unique`]. + /// + /// [`CallReturn`]: crate::analysis::graph::Node::CallReturn + /// [control flow propagation pass]: mod@propagate_control_flow + #[must_use] + fn retarget_non_returning_calls_to_artificial_sink(&mut self) -> Vec { + let mut log_messages = Vec::new(); + let dummy_sub_tid = Tid::new("Artificial Sink Sub"); + let dummy_blk_tid = Tid::new("Artificial Sink Block"); + let non_returning_subs: HashSet<_> = self + .program + .term + .subs + .values() + .filter_map(|sub| { + let sub_returns = sub.term.blocks.iter().any(|block| { + block + .term + .jmps + .iter() + .any(|jmp| matches!(jmp.term, Jmp::Return(..))) + }); + + if sub_returns || sub.tid == dummy_sub_tid { + None + } else { + log_messages.push(LogMessage::new_info(format!( + "{} is non-returning.", + sub.tid + ))); + + Some(sub.tid.clone()) + } + }) + .collect(); for sub in self.program.term.subs.values_mut() { for block in sub.term.blocks.iter_mut() { @@ -128,30 +181,61 @@ impl Project { { if let Some(extern_symbol) = self.program.term.extern_symbols.get(target) { if extern_symbol.no_return { - // Call to an extern symbol that does not return. *return_tid = dummy_blk_tid.clone(); - has_at_least_one_jmp_been_retargeted = true; } + } else if non_returning_subs.contains(target) { + log_messages.push(LogMessage::new_info(format!( + "Call @ {} to {} does not return to {}.", + jmp.tid, target, return_tid + ))); + *return_tid = dummy_blk_tid.clone(); } } } } } - has_at_least_one_jmp_been_retargeted + + log_messages + } + + /// Adds an artificial target for returns from non-returning functions and + /// jumps to non-existing TIDs. + fn add_artifical_sinks(&mut self) { + let dummy_sub_tid = Tid::new("Artificial Sink Sub"); + let dummy_blk_tid = Tid::new("Artificial Sink Block"); + let dummy_sub: Term = Term { + tid: dummy_sub_tid, + term: Sub { + name: "Artificial Sink Sub".to_string(), + blocks: vec![Term { + tid: dummy_blk_tid, + term: Blk { + defs: Vec::new(), + jmps: Vec::new(), + indirect_jmp_targets: Vec::new(), + }, + }], + calling_convention: None, + }, + }; + + self.program + .term + .subs + .insert(dummy_sub.tid.clone(), dummy_sub); } /// Replace jumps to nonexisting TIDs with jumps to a dummy target /// representing an artificial sink in the control flow graph. /// Return a log message for each replaced jump target. - /// Also retarget the return address of extern symbol calls that are marked as non-returning to the artificial sink. /// /// Nonexisting jump targets may be generated by the Ghidra backend /// if the data at the target address is not a valid assembly instruction. #[must_use] - fn remove_references_to_nonexisting_tids_and_retarget_non_returning_calls( - &mut self, - ) -> Vec { - // Gather all existing jump targets + fn remove_references_to_nonexisting_tids(&mut self) -> Vec { + let mut log_messages = Vec::new(); + + // Gather all existing jump targets. let mut jump_target_tids = HashSet::new(); for sub in self.program.term.subs.values() { jump_target_tids.insert(sub.tid.clone()); @@ -162,10 +246,10 @@ impl Project { for symbol_tid in self.program.term.extern_symbols.keys() { jump_target_tids.insert(symbol_tid.clone()); } + // Replace all jumps to non-existing jump targets with jumps to dummy targets let dummy_sub_tid = Tid::new("Artificial Sink Sub"); let dummy_blk_tid = Tid::new("Artificial Sink Block"); - let mut log_messages = Vec::new(); for sub in self.program.term.subs.values_mut() { for block in sub.term.blocks.iter_mut() { if let Err(mut logs) = @@ -184,31 +268,7 @@ impl Project { } } } - // Also replace return targets of calls to non-returning extern symbols - let dummy_blk_needs_to_be_added = - self.retarget_calls_to_non_returning_symbols_to_artificial_sink(&dummy_blk_tid); - // If at least one dummy jump was inserted, add the corresponding dummy sub and block to the program. - if dummy_blk_needs_to_be_added || !log_messages.is_empty() { - let dummy_sub: Term = Term { - tid: dummy_sub_tid, - term: Sub { - name: "Artificial Sink Sub".to_string(), - blocks: vec![Term { - tid: dummy_blk_tid, - term: Blk { - defs: Vec::new(), - jmps: Vec::new(), - indirect_jmp_targets: Vec::new(), - }, - }], - calling_convention: None, - }, - }; - self.program - .term - .subs - .insert(dummy_sub.tid.clone(), dummy_sub); - } + log_messages } @@ -288,10 +348,14 @@ impl Project { #[must_use] pub fn normalize(&mut self) -> Vec { let mut logs = self.remove_duplicate_tids(); + self.add_artifical_sinks(); + logs.append(self.remove_references_to_nonexisting_tids().as_mut()); + make_block_to_sub_mapping_unique(self); logs.append( - &mut self.remove_references_to_nonexisting_tids_and_retarget_non_returning_calls(), + self.retarget_non_returning_calls_to_artificial_sink() + .as_mut(), ); - make_block_to_sub_mapping_unique(self); + crate::analysis::expression_propagation::propagate_input_expression(self); self.substitute_trivial_expressions(); crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self); From 86f3c123f6c8b0c85d18736eea5db3a5460689fb Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Sat, 4 May 2024 11:36:53 +0200 Subject: [PATCH 09/11] lib/ir/project: test `call_return_to_cond_jump_removed` in CF-propagation Add a test to verify that retargeting returns from calls to non-returning functions is indeed solving the problem this pass has with "dangling" references to return sites. Link: https://github.com/fkie-cad/cwe_checker/pull/462#issuecomment-2093656674 Signed-off-by: Valentin Obst --- .../project/propagate_control_flow.rs | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 89f0b2f59..02a2aedef 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -622,6 +622,72 @@ pub mod tests { ); } + #[test] + fn call_return_to_cond_jump_removed() { + let sub_1 = Sub { + name: "sub_1".to_string(), + calling_convention: None, + blocks: vec![ + mock_condition_block("cond_blk_1", "cond_blk_2", "end_blk_1"), + mock_block_with_defs_and_call("call_blk", "sub_2", "cond_blk_2"), + mock_condition_block("cond_blk_2", "end_blk_2", "end_blk_1"), + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ], + }; + let sub_1 = Term { + tid: Tid::new("sub_1"), + term: sub_1, + }; + let sub_2 = Sub { + name: "sub_2".to_string(), + calling_convention: None, + blocks: vec![mock_block_with_defs("loop_block", "loop_block")], + }; + let sub_2 = Term { + tid: Tid::new("sub_2"), + term: sub_2, + }; + let mut project = Project::mock_arm32(); + project.program.term.subs = + BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); + + project.add_artifical_sinks(); + let mut log_msg_non_returning = project.retarget_non_returning_calls_to_artificial_sink(); + assert_eq!( + log_msg_non_returning.pop().unwrap().text, + "Call @ call_blk_call to sub_2 does not return to cond_blk_2.".to_owned() + ); + assert_eq!( + log_msg_non_returning.pop().unwrap().text, + "sub_2 is non-returning.".to_owned() + ); + assert_eq!( + log_msg_non_returning.pop().unwrap().text, + "sub_1 is non-returning.".to_owned() + ); + + propagate_control_flow(&mut project); + + // Does not panic even though original call return block was removed. + let _ = graph::get_program_cfg(&project.program); + + let expected_blocks = vec![ + // `cond_blk_1` can be retarget. + mock_condition_block("cond_blk_1", "end_blk_2", "end_blk_1"), + // `call_blk` was re-targeted to artificial sink. + mock_block_with_defs_and_call("call_blk", "sub_2", "Artificial Sink Block"), + // `cond_blk_2` was removed since cond_blk_1 was re-targeted. + mock_block_with_defs("end_blk_1", "end_blk_1"), + mock_block_with_defs("end_blk_2", "end_blk_2"), + ]; + + assert_eq!( + &project.program.term.subs[&Tid::new("sub_1")].term.blocks[..], + &expected_blocks[..] + ); + } + #[test] fn multiple_incoming_same_condition() { let sub = Sub { From 818e4ef9f31608559aa09b6e24c606d552e73116 Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Tue, 14 May 2024 12:45:55 +0200 Subject: [PATCH 10/11] lib/ir/project: keep uniq. block-to-sub map in non-ret. calls pass The pass that retargets "returns" from non-returning functions runs after the block-to-sub mapping has been made unique. This invariant is relied upon by later analyses. Currently, the pass does not uphold this invariant since it always retargets to the same global artificial sink block. Modify the pass s.t. it preserves a unique block-to-sub mapping by retargeting returns directly to the Sub's artificial sink. Signed-off-by: Valentin Obst --- .../src/intermediate_representation/blk.rs | 25 +- .../intermediate_representation/project.rs | 221 ++++++++++-------- .../project/propagate_control_flow.rs | 12 +- .../src/intermediate_representation/sub.rs | 46 ++++ .../src/intermediate_representation/term.rs | 44 +++- 5 files changed, 238 insertions(+), 110 deletions(-) diff --git a/src/cwe_checker_lib/src/intermediate_representation/blk.rs b/src/cwe_checker_lib/src/intermediate_representation/blk.rs index 6f3f11b0f..75e32c6ba 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/blk.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/blk.rs @@ -39,19 +39,22 @@ pub struct Blk { } impl Term { - /// Remove indirect jump target addresses for which no corresponding target block exists. - /// Return an error message for each removed address. + /// Remove indirect jump target addresses for which no corresponding target + /// block exists. + /// + /// Returns an error message for each removed address. pub fn remove_nonexisting_indirect_jump_targets( &mut self, - known_block_tids: &HashSet, + all_jump_targets: &HashSet, ) -> Result<(), Vec> { let mut logs = Vec::new(); + self.term.indirect_jmp_targets = self .term .indirect_jmp_targets .iter() .filter_map(|target| { - if known_block_tids.get(target).is_some() { + if all_jump_targets.contains(target) { Some(target.clone()) } else { let error_msg = @@ -61,12 +64,26 @@ impl Term { } }) .collect(); + if logs.is_empty() { Ok(()) } else { Err(logs) } } + + /// Returns a new artificial sink block with the given suffix attached to + /// its ID. + pub fn artificial_sink(id_suffix: &str) -> Self { + Self { + tid: Tid::artificial_sink_block(id_suffix), + term: Blk { + defs: Vec::with_capacity(0), + jmps: Vec::with_capacity(0), + indirect_jmp_targets: Vec::with_capacity(0), + }, + } + } } impl fmt::Display for Blk { diff --git a/src/cwe_checker_lib/src/intermediate_representation/project.rs b/src/cwe_checker_lib/src/intermediate_representation/project.rs index 503782f03..3829aebb4 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project.rs @@ -109,11 +109,11 @@ impl Project { } /// Replaces the return-to TID of calls to non-returning functions with the - /// TID of the artificial sink block. + /// TID of the artificial sink block in the caller. /// /// We distinguish two kinds of non-returning functions: /// - /// - extern symbols that are marked as non-returning (e.g. `exit(..)`), + /// - extern symbols that are marked as non-returning, e.g.,`exit(..)`, /// - functions without a return instruction. /// /// For calls to the latter functions, no [`CallReturn`] nodes and @@ -133,18 +133,79 @@ impl Project { /// Ghidra sometimes thinks its an indirect jump, /// - cases where the callee code that we get from Gidra is incomplete. /// - /// This heuristic works better when the sub-to-block mapping is unique + /// This heuristic works better when the block-to-sub mapping is unique /// since this pass may inline return site into callees that end in a tail /// jump, i.e., call this after [`make_block_to_sub_mapping_unique`]. + /// This pass preserves a unique block-to-sub mapping. /// /// [`CallReturn`]: crate::analysis::graph::Node::CallReturn /// [control flow propagation pass]: mod@propagate_control_flow #[must_use] fn retarget_non_returning_calls_to_artificial_sink(&mut self) -> Vec { + let (non_returning_subs, mut log_messages) = self.find_non_returning_subs(); + + // INVARIANT: A unique block-to-sub mapping is preserved. + for sub in self + .program + .term + .subs + .values_mut() + .filter(|sub| !sub.tid.is_artificial_sink_sub()) + { + let sub_id_suffix = sub.id_suffix(); + let mut one_or_more_call_retargeted = false; + + for block in sub.term.blocks.iter_mut() { + for jmp in block.term.jmps.iter_mut() { + let Jmp::Call { + target, + return_: Some(return_tid), + } = &mut jmp.term + else { + continue; + }; + + if return_tid.is_artificial_sink_block(&sub_id_suffix) { + // The call is already returning to the function's + // artificial sink so there is nothing to do. + continue; + } else if let Some(extern_symbol) = self.program.term.extern_symbols.get(target) + { + if extern_symbol.no_return { + // Reroute returns from calls to non-returning + // library functions. + *return_tid = Tid::artificial_sink_block(&sub_id_suffix); + + one_or_more_call_retargeted = true; + } + } else if non_returning_subs.contains(target) { + // Reroute returns from calls to non-returning + // functions within the program. + log_messages.push(LogMessage::new_info(format!( + "Call @ {} to {} does not return to {}.", + jmp.tid, target, return_tid + ))); + + *return_tid = Tid::artificial_sink_block(&sub_id_suffix); + + one_or_more_call_retargeted = true; + } + } + } + + // Add artificial sink block if required. + if one_or_more_call_retargeted { + sub.add_artifical_sink(); + } + } + + log_messages + } + + /// Returns the set of all subs without a return instruction. + fn find_non_returning_subs(&self) -> (HashSet, Vec) { let mut log_messages = Vec::new(); - let dummy_sub_tid = Tid::new("Artificial Sink Sub"); - let dummy_blk_tid = Tid::new("Artificial Sink Block"); - let non_returning_subs: HashSet<_> = self + let non_returning_subs = self .program .term .subs @@ -158,7 +219,7 @@ impl Project { .any(|jmp| matches!(jmp.term, Jmp::Return(..))) }); - if sub_returns || sub.tid == dummy_sub_tid { + if sub_returns || sub.tid.is_artificial_sink_sub() { None } else { log_messages.push(LogMessage::new_info(format!( @@ -171,98 +232,59 @@ impl Project { }) .collect(); - for sub in self.program.term.subs.values_mut() { - for block in sub.term.blocks.iter_mut() { - for jmp in block.term.jmps.iter_mut() { - if let Jmp::Call { - target, - return_: Some(return_tid), - } = &mut jmp.term - { - if let Some(extern_symbol) = self.program.term.extern_symbols.get(target) { - if extern_symbol.no_return { - *return_tid = dummy_blk_tid.clone(); - } - } else if non_returning_subs.contains(target) { - log_messages.push(LogMessage::new_info(format!( - "Call @ {} to {} does not return to {}.", - jmp.tid, target, return_tid - ))); - *return_tid = dummy_blk_tid.clone(); - } - } - } - } - } - - log_messages + (non_returning_subs, log_messages) } - /// Adds an artificial target for returns from non-returning functions and - /// jumps to non-existing TIDs. - fn add_artifical_sinks(&mut self) { - let dummy_sub_tid = Tid::new("Artificial Sink Sub"); - let dummy_blk_tid = Tid::new("Artificial Sink Block"); - let dummy_sub: Term = Term { - tid: dummy_sub_tid, - term: Sub { - name: "Artificial Sink Sub".to_string(), - blocks: vec![Term { - tid: dummy_blk_tid, - term: Blk { - defs: Vec::new(), - jmps: Vec::new(), - indirect_jmp_targets: Vec::new(), - }, - }], - calling_convention: None, - }, - }; - + /// Adds a function that serves as an artificial sink in the CFG. + fn add_artifical_sink(&mut self) { self.program .term .subs - .insert(dummy_sub.tid.clone(), dummy_sub); + .insert(Tid::artificial_sink_sub(), Term::::artificial_sink()); } - /// Replace jumps to nonexisting TIDs with jumps to a dummy target - /// representing an artificial sink in the control flow graph. - /// Return a log message for each replaced jump target. - /// - /// Nonexisting jump targets may be generated by the Ghidra backend - /// if the data at the target address is not a valid assembly instruction. - #[must_use] - fn remove_references_to_nonexisting_tids(&mut self) -> Vec { - let mut log_messages = Vec::new(); - - // Gather all existing jump targets. + /// Returns the set of all valid jump targets. + fn find_all_jump_targets(&self) -> HashSet { let mut jump_target_tids = HashSet::new(); + for sub in self.program.term.subs.values() { jump_target_tids.insert(sub.tid.clone()); for block in sub.term.blocks.iter() { jump_target_tids.insert(block.tid.clone()); } } + for symbol_tid in self.program.term.extern_symbols.keys() { jump_target_tids.insert(symbol_tid.clone()); } - // Replace all jumps to non-existing jump targets with jumps to dummy targets - let dummy_sub_tid = Tid::new("Artificial Sink Sub"); - let dummy_blk_tid = Tid::new("Artificial Sink Block"); + jump_target_tids + } + + /// Replace jumps to nonexisting TIDs with jumps to a dummy target + /// representing an artificial sink in the control flow graph. + /// Return a log message for each replaced jump target. + /// + /// Nonexisting jump targets may be generated by the Ghidra backend + /// if the data at the target address is not a valid assembly instruction. + #[must_use] + fn remove_references_to_nonexisting_tids(&mut self) -> Vec { + let mut log_messages = Vec::new(); + let all_jump_targets = self.find_all_jump_targets(); + + // Replace all jumps to non-existing jump targets with jumps to dummy + // targets. for sub in self.program.term.subs.values_mut() { for block in sub.term.blocks.iter_mut() { if let Err(mut logs) = - block.remove_nonexisting_indirect_jump_targets(&jump_target_tids) + block.remove_nonexisting_indirect_jump_targets(&all_jump_targets) { log_messages.append(&mut logs); } for jmp in block.term.jmps.iter_mut() { - if let Err(log_msg) = jmp.retarget_nonexisting_jump_targets_to_dummy_tid( - &jump_target_tids, - &dummy_sub_tid, - &dummy_blk_tid, - ) { + if let Err(log_msg) = + jmp.retarget_nonexisting_jump_targets_to_artificial_sink(&all_jump_targets) + { log_messages.push(log_msg); } } @@ -348,14 +370,13 @@ impl Project { #[must_use] pub fn normalize(&mut self) -> Vec { let mut logs = self.remove_duplicate_tids(); - self.add_artifical_sinks(); + self.add_artifical_sink(); logs.append(self.remove_references_to_nonexisting_tids().as_mut()); make_block_to_sub_mapping_unique(self); logs.append( self.retarget_non_returning_calls_to_artificial_sink() .as_mut(), ); - crate::analysis::expression_propagation::propagate_input_expression(self); self.substitute_trivial_expressions(); crate::analysis::dead_variable_elimination::remove_dead_var_assignments(self); @@ -370,29 +391,32 @@ impl Project { } impl Term { - /// If the TID of a jump target or return target is not contained in `known_tids` - /// replace it with a dummy TID and return an error message. - fn retarget_nonexisting_jump_targets_to_dummy_tid( + /// If the TID of a jump target, call target, or return target is not + /// contained in in the known jump targets, replace it with a dummy TID and + /// return an error message. + fn retarget_nonexisting_jump_targets_to_artificial_sink( &mut self, - known_tids: &HashSet, - dummy_sub_tid: &Tid, - dummy_blk_tid: &Tid, + all_jump_targets: &HashSet, ) -> Result<(), LogMessage> { use Jmp::*; match &mut self.term { - BranchInd(_) => (), - Branch(tid) | CBranch { target: tid, .. } if known_tids.get(tid).is_none() => { + BranchInd(_) => Ok(()), + Branch(tid) | CBranch { target: tid, .. } if !all_jump_targets.contains(tid) => { let error_msg = format!("Jump target at {} does not exist", tid.address); let error_log = LogMessage::new_error(error_msg).location(self.tid.clone()); - *tid = dummy_blk_tid.clone(); - return Err(error_log); + + *tid = Tid::artificial_sink_block(""); + + Err(error_log) } - Call { target, return_ } if known_tids.get(target).is_none() => { + Call { target, return_ } if !all_jump_targets.contains(target) => { let error_msg = format!("Call target at {} does not exist", target.address); let error_log = LogMessage::new_error(error_msg).location(self.tid.clone()); - *target = dummy_sub_tid.clone(); + + *target = Tid::artificial_sink_sub(); *return_ = None; - return Err(error_log); + + Err(error_log) } Call { return_: Some(return_tid), @@ -405,15 +429,16 @@ impl Term { | CallOther { return_: Some(return_tid), .. - } if known_tids.get(return_tid).is_none() => { + } if !all_jump_targets.contains(return_tid) => { let error_msg = format!("Return target at {} does not exist", return_tid.address); let error_log = LogMessage::new_error(error_msg).location(self.tid.clone()); - *return_tid = dummy_blk_tid.clone(); - return Err(error_log); + + *return_tid = Tid::artificial_sink_block(""); + + Err(error_log) } - _ => (), + _ => Ok(()), } - Ok(()) } } @@ -429,12 +454,8 @@ mod tests { }; assert_eq!(jmp_term.term, Jmp::Branch(Tid::new("nonexisting_target"))); assert!(jmp_term - .retarget_nonexisting_jump_targets_to_dummy_tid( - &HashSet::new(), - &Tid::new("dummy_sub"), - &Tid::new("dummy_blk") - ) + .retarget_nonexisting_jump_targets_to_artificial_sink(&HashSet::new(),) .is_err()); - assert_eq!(jmp_term.term, Jmp::Branch(Tid::new("dummy_blk"))); + assert_eq!(jmp_term.term, Jmp::Branch(Tid::artificial_sink_block(""))); } } diff --git a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs index 02a2aedef..eed888509 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/project/propagate_control_flow.rs @@ -41,7 +41,7 @@ pub fn propagate_control_flow(project: &mut Project) { }; // Conditions that we know to be true "on" a particular outgoing // edge. - let mut true_conditions = Vec::new(); + let mut true_conditions = Vec::with_capacity(2); // Check if some condition must be true at the beginning of the block, // and still holds after all DEFs are executed. if let Some(block_precondition) = @@ -80,7 +80,7 @@ pub fn propagate_control_flow(project: &mut Project) { // Call may have side-effects that invalidate our // knowledge about any condition we know to be true // after execution of all DEFs in a block. - &Vec::new(), + &Vec::with_capacity(0), ) { jmps_to_retarget.insert(call_tid.clone(), new_target); } @@ -652,7 +652,6 @@ pub mod tests { project.program.term.subs = BTreeMap::from([(Tid::new("sub_1"), sub_1), (Tid::new("sub_2"), sub_2)]); - project.add_artifical_sinks(); let mut log_msg_non_returning = project.retarget_non_returning_calls_to_artificial_sink(); assert_eq!( log_msg_non_returning.pop().unwrap().text, @@ -675,11 +674,14 @@ pub mod tests { let expected_blocks = vec![ // `cond_blk_1` can be retarget. mock_condition_block("cond_blk_1", "end_blk_2", "end_blk_1"), - // `call_blk` was re-targeted to artificial sink. - mock_block_with_defs_and_call("call_blk", "sub_2", "Artificial Sink Block"), + // `call_blk` was re-targeted to artificial sink as it was deemed + // non-returning. + mock_block_with_defs_and_call("call_blk", "sub_2", "Artificial Sink Block_sub_1"), // `cond_blk_2` was removed since cond_blk_1 was re-targeted. mock_block_with_defs("end_blk_1", "end_blk_1"), mock_block_with_defs("end_blk_2", "end_blk_2"), + // Artificial sink block was added to sub_1 since it did not exist. + Term::::artificial_sink("_sub_1"), ]; assert_eq!( diff --git a/src/cwe_checker_lib/src/intermediate_representation/sub.rs b/src/cwe_checker_lib/src/intermediate_representation/sub.rs index 06ce8247a..7c8b71926 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/sub.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/sub.rs @@ -18,6 +18,52 @@ pub struct Sub { pub calling_convention: Option, } +impl Term { + /// Returns the ID suffix for this function. + pub fn id_suffix(&self) -> String { + format!("_{}", self.tid) + } + + /// Returns true iff the function has an artificial sink block. + pub fn has_artifical_sink(&self) -> bool { + let id_suffix = self.id_suffix(); + + self.term + .blocks + .iter() + .any(|blk| blk.tid.is_artificial_sink_block(&id_suffix)) + } + + /// Returns a new artificial sink sub. + pub fn artificial_sink() -> Self { + Self { + tid: Tid::artificial_sink_sub(), + term: Sub { + name: "Artificial Sink Sub".to_string(), + blocks: vec![Term::::artificial_sink("")], + calling_convention: None, + }, + } + } + + /// Adds an artificial sink block if there is none. + /// + /// Returns true iff the artificial sink block was added. + pub fn add_artifical_sink(&mut self) -> bool { + if self.has_artifical_sink() { + false + } else { + let id_suffix = self.id_suffix(); + + self.term + .blocks + .push(Term::::artificial_sink(&id_suffix)); + + true + } + } +} + /// A parameter or return argument of a function. #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Clone)] pub enum Arg { diff --git a/src/cwe_checker_lib/src/intermediate_representation/term.rs b/src/cwe_checker_lib/src/intermediate_representation/term.rs index 91b26b2c6..d98acc76a 100644 --- a/src/cwe_checker_lib/src/intermediate_representation/term.rs +++ b/src/cwe_checker_lib/src/intermediate_representation/term.rs @@ -14,12 +14,25 @@ pub struct Tid { } impl Tid { + /// Prefix for IDs of artificial sinks in the control flow graph. + /// + /// Dummy blocks with such TIDs are added for different purposes, e.g., as + /// targets for jumps to non-existing targets or return targets for calls to + /// non-returning functions. + const ARTIFICIAL_SINK_BLOCK_ID_PREFIX: &'static str = "Artificial Sink Block"; + /// The ID of the artificial sink sub. + /// + /// This is used as the target for calls to non-existing functions. + const ARTIFICIAL_SINK_SUB_ID: &'static str = "Artificial Sink Sub"; + /// Address for use in IDs of terms that do not have an address. + const UNKNOWN_ADDRESS: &'static str = "UNKNOWN"; + /// Generate a new term identifier with the given ID string /// and with unknown address. pub fn new(val: T) -> Tid { Tid { id: val.to_string(), - address: "UNKNOWN".to_string(), + address: Self::UNKNOWN_ADDRESS.to_string(), } } @@ -47,6 +60,35 @@ impl Tid { address: address.to_string(), } } + + /// Returns a new ID for an artificial sink block with the given suffix. + pub fn artificial_sink_block(suffix: &str) -> Self { + Self { + id: format!("{}{}", Self::ARTIFICIAL_SINK_BLOCK_ID_PREFIX, suffix), + address: Self::UNKNOWN_ADDRESS.to_string(), + } + } + + /// Returns a new ID for the artificial sink sub. + pub fn artificial_sink_sub() -> Self { + Self { + id: Self::ARTIFICIAL_SINK_SUB_ID.to_string(), + address: Self::UNKNOWN_ADDRESS.to_string(), + } + } + + /// Returns true iff the ID is for the artificial sink block with the given + /// suffix. + pub fn is_artificial_sink_block(&self, suffix: &str) -> bool { + self.id.starts_with(Self::ARTIFICIAL_SINK_BLOCK_ID_PREFIX) + && self.has_id_suffix(suffix) + && self.address == Self::UNKNOWN_ADDRESS + } + + /// Returns true iff the ID is for the artificial sink sub. + pub fn is_artificial_sink_sub(&self) -> bool { + self.id == Self::ARTIFICIAL_SINK_SUB_ID && self.address == Self::UNKNOWN_ADDRESS + } } impl std::fmt::Display for Tid { From 395e0c2ff42da5f833de5bef646f6442a293e5bb Mon Sep 17 00:00:00 2001 From: Valentin Obst Date: Tue, 14 May 2024 21:06:20 +0200 Subject: [PATCH 11/11] CHANGES: improved Control Flow Propagation pass Link: https://github.com/fkie-cad/cwe_checker/pull/462 Signed-off-by: Valentin Obst --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 65ba2ab95..bfd56cc07 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,7 @@ 0.9-dev === +- Improve Control Flow Propagation normalization pass (PR #462) - Improve taint analysis abstraction to simplify interprocedural bottom-up analysis of memory taint (PR #451) - Added check for CWE-252: Unchecked Return Value (PR #451)