Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 469: Semantically invalid IR statements due to bad Subregister Substitution #470

Merged
merged 7 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
0.9-dev
===

- Fixed an issue in the pcode to IR translation (PR #470)
- Added initial benchmarking infrastructure (PR #464)
- Improve Control Flow Propagation normalization pass (PR #462)
- Improve taint analysis abstraction to simplify interprocedural bottom-up
Expand Down
73 changes: 69 additions & 4 deletions src/caller/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,47 @@ extern crate cwe_checker_lib; // Needed for the docstring-link to work

use anyhow::Context;
use anyhow::Error;
use clap::Parser;
use clap::{Parser, ValueEnum};

use cwe_checker_lib::analysis::graph;
use cwe_checker_lib::pipeline::{disassemble_binary, AnalysisResults};
use cwe_checker_lib::utils::binary::BareMetalConfig;
use cwe_checker_lib::utils::debug;
use cwe_checker_lib::utils::log::{print_all_messages, LogLevel};
use cwe_checker_lib::utils::read_config_file;

use std::collections::{BTreeSet, HashSet};
use std::convert::From;
use std::path::PathBuf;

#[derive(ValueEnum, Clone, Debug, Copy)]
/// Selects which kind of debug output is displayed.
pub enum CliDebugMode {
/// Result of the Pointer Inference computation.
Pi,
/// Unnormalized IR form of the program.
IrRaw,
/// Normalized IR form of the program.
IrNorm,
/// Optimized IR form of the program.
IrOpt,
/// Output of the Ghidra plugin.
PcodeRaw,
}

impl From<&CliDebugMode> for debug::Stage {
fn from(mode: &CliDebugMode) -> Self {
use CliDebugMode::*;
match mode {
Pi => debug::Stage::Pi,
IrRaw => debug::Stage::Ir(debug::IrForm::Raw),
IrNorm => debug::Stage::Ir(debug::IrForm::Normalized),
IrOpt => debug::Stage::Ir(debug::IrForm::Optimized),
PcodeRaw => debug::Stage::Pcode(debug::PcodeForm::Raw),
}
}
}

#[derive(Debug, Parser)]
#[command(version, about)]
/// Find vulnerable patterns in binary executables
Expand Down Expand Up @@ -67,7 +99,39 @@ struct CmdlineArgs {
/// Output for debugging purposes.
/// The current behavior of this flag is unstable and subject to change.
#[arg(long, hide(true))]
debug: bool,
debug: Option<CliDebugMode>,

/// Read the saved output of the Pcode Extractor plugin from a file instead
/// of invoking Ghidra.
#[arg(long, hide(true))]
pcode_raw: Option<String>,
}

impl From<&CmdlineArgs> for debug::Settings {
fn from(args: &CmdlineArgs) -> Self {
let stage = match &args.debug {
None => debug::Stage::default(),
Some(mode) => mode.into(),
};
let verbosity = if args.verbose {
debug::Verbosity::Verbose
} else if args.quiet {
debug::Verbosity::Quiet
} else {
debug::Verbosity::default()
};

let mut builder = debug::SettingsBuilder::default()
.set_stage(stage)
.set_verbosity(verbosity)
.set_termination_policy(debug::TerminationPolicy::EarlyExit);

if let Some(pcode_raw) = &args.pcode_raw {
builder = builder.set_saved_pcode_raw(PathBuf::from(pcode_raw.clone()));
}

builder.build()
}
}

fn main() -> Result<(), Error> {
Expand All @@ -90,6 +154,7 @@ fn check_file_existence(file_path: &str) -> Result<String, String> {

/// Run the cwe_checker with Ghidra as its backend.
fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let debug_settings = args.into();
let mut modules = cwe_checker_lib::get_modules();
if args.module_versions {
// Only print the module versions and then quit.
Expand All @@ -111,7 +176,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());

let (binary, project, mut all_logs) =
disassemble_binary(&binary_file_path, bare_metal_config_opt, args.verbose)?;
disassemble_binary(&binary_file_path, bare_metal_config_opt, &debug_settings)?;

// Filter the modules to be executed.
if let Some(ref partial_module_list) = args.partial {
Expand Down Expand Up @@ -186,7 +251,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
// Print debug and then return.
// Right now there is only one debug printing function.
// When more debug printing modes exist, this behaviour will change!
if args.debug {
if debug_settings.should_debug(debug::Stage::Pi) {
cwe_checker_lib::analysis::pointer_inference::run(
&analysis_results,
serde_json::from_value(config["Memory"].clone()).unwrap(),
Expand Down
6 changes: 6 additions & 0 deletions src/cwe_checker_lib/src/abstract_domain/data/arithmetics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,12 @@ impl<T: RegisterDomain> RegisterDomain for DataDomain<T> {

/// extract a sub-bitvector
fn subpiece(&self, low_byte: ByteSize, size: ByteSize) -> Self {
// Extracting zero-sized subpieces is an semantically invalid operation.
debug_assert_ne!(
size,
ByteSize::new(0),
"Attempt to extract zero-sized subpiece."
);
if low_byte == ByteSize::new(0) && size == self.bytesize() {
// The operation is a no-op
self.clone()
Expand Down
5 changes: 4 additions & 1 deletion src/cwe_checker_lib/src/abstract_domain/interval.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,10 @@ impl IntervalDomain {
interval,
widening_lower_bound: lower_bound,
widening_upper_bound: upper_bound,
widening_delay: self.widening_delay >> low_byte.as_bit_length(),
widening_delay: self
.widening_delay
.overflowing_shr(low_byte.as_bit_length() as u32)
.0,
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ fn mark_values_in_caller_global_mem_as_potentially_overwritten(
} else {
caller_global_mem_region.mark_interval_values_as_top(
*index as i64,
std::i64::MAX - 1,
i64::MAX - 1,
ByteSize::new(1),
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ impl State {
self.store_value(
&Data::from_target(
parent_id,
Bitvector::from_u64(address + offset as u64)
Bitvector::from_u64(address.wrapping_add(offset as u64))
.into_resize_signed(self.stack_id.bytesize())
.into(),
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,22 @@ use crate::{expr, intermediate_representation::*, variable};

#[cfg(test)]
impl Expression {
/// Shortcut for creating a cast expression
/// Shortcut for creating a cast expression.
#[cfg(test)]
pub fn cast(self, op: CastOpType) -> Expression {
pub fn cast_to_size(self, op: CastOpType, result_size: ByteSize) -> Expression {
Expression::Cast {
op,
size: ByteSize::new(8),
size: result_size,
arg: Box::new(self),
}
}

/// Shortcut for creating a cast expression with target size 8.
#[cfg(test)]
pub fn cast(self, op: CastOpType) -> Expression {
self.cast_to_size(op, ByteSize::new(8))
}

/// Shortcut for creating a subpiece expression
#[cfg(test)]
pub fn subpiece(self, low_byte: ByteSize, size: ByteSize) -> Expression {
Expand Down
59 changes: 51 additions & 8 deletions src/cwe_checker_lib/src/pcode/subregister_substitution/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,23 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
fn replace_output_subregister(&mut self, def: Term<Def>) {
match &def.term {
Def::Assign { var, value } => {
// At this point, the code should be in a form where variables
// being assigned and values that are assigned are of the same
// size.
debug_assert_eq!(
var.size,
value.bytesize(),
"Encountered invalid assignment: variable {} has size {} vs. value {} has size {}.",
var,
var.size,
value,
value.bytesize()
);
if let Some(register) = self.register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
// Check if the variable being assigned is a subregister.
if is_subregister_assignment(var, base_register) {
// The register is not a base register and should be replaced.
if self.is_next_def_cast_to_base_register(var) {
let mut output = self.input_iter.next().unwrap().clone();
Expand All @@ -128,14 +143,13 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
self.output_defs.push(output);
return;
} else {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
let output_var: Variable = base_register.into();
let register = into_subregister(register, var);
let output_expression =
piece_base_register_assignment_expression_together(
value,
base_register,
register,
&register,
);
self.output_defs.push(Term {
tid: def.tid.clone(),
Expand All @@ -151,7 +165,9 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
}
Def::Load { var, address } => {
if let Some(register) = self.register_map.get(&var.name) {
if var.name != register.base_register || var.size < register.size {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
if is_subregister_assignment(var, base_register) {
// The register is not a base register and should be replaced.
// We need two replacement defs: One is a load into a temporary register
// and the second is a cast to the base register.
Expand All @@ -176,16 +192,15 @@ impl<'a> SubregisterSubstitutionBuilder<'a> {
}
self.output_defs.push(cast_to_base_def);
} else {
let base_register: &RegisterProperties =
self.register_map.get(&register.base_register).unwrap();
let register = into_subregister(register, var);
self.output_defs.push(Term {
tid: def.tid.clone().with_id_suffix("_cast_to_base"),
term: Def::Assign {
var: base_register.into(),
value: piece_base_register_assignment_expression_together(
&Expression::Var(temp_reg),
base_register,
register,
&register,
),
},
});
Expand Down Expand Up @@ -359,5 +374,33 @@ fn piece_base_register_assignment_expression_together(
}
}

/// Helper to check wheter an assignment to `var` is a subregister assignment.
///
/// Assumes that `var` is held in a register and that `base_register` is the
/// corresponding full-size register.
fn is_subregister_assignment(var: &Variable, base_register: &RegisterProperties) -> bool {
var.name != base_register.register || var.size < base_register.size
}

/// Helper to create a subregister based on a variable.
///
/// Assumes that `var` is held in a register whose full-size version is equal
/// to, or a superregister of, `register`.
fn into_subregister(register: &RegisterProperties, var: &Variable) -> RegisterProperties {
// Background: If a subregister being assigned has the same
// name as the full-size register (but a different size), `register`
// will be the full register at this point. Thus, we correct the
// size manually and create a register with the full-size name but a
// smaller size. This should be a no-op if `register` is already the
// correct subregister.
// FIXME: This is incomplete if the LSB of the subregister and the full
// register are different. Until now this has not been observed as in those
// cases the register names are different.
let mut register = register.clone();
register.size = var.size;

register
}

#[cfg(test)]
mod tests;
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ fn piecing_or_zero_extending() {
tid: Tid::new("zext_ah_to_eax"),
term: Def::Assign {
var: variable!("EAX:4"),
value: Expression::cast(expr!("AH:1"), CastOpType::IntZExt),
value: Expression::cast_to_size(expr!("AH:1"), CastOpType::IntZExt, ByteSize::new(4)),
},
};
let zext_ah_to_rax = Term {
Expand Down
1 change: 0 additions & 1 deletion src/cwe_checker_lib/src/pcode/term.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use std::collections::{BTreeSet, HashMap};
use std::usize;

use super::subregister_substitution::replace_input_subregister;
use super::{Expression, ExpressionType, RegisterProperties, Variable};
Expand Down
18 changes: 15 additions & 3 deletions src/cwe_checker_lib/src/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub use results::AnalysisResults;

use crate::intermediate_representation::{Project, RuntimeMemoryImage};
use crate::prelude::*;
use crate::utils::debug;
use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path;
Expand All @@ -17,18 +18,29 @@ use std::path::Path;
pub fn disassemble_binary(
binary_file_path: &Path,
bare_metal_config_opt: Option<BareMetalConfig>,
verbose_flag: bool,
debug_settings: &debug::Settings,
) -> Result<(Vec<u8>, Project, Vec<LogMessage>), Error> {
let binary: Vec<u8> =
std::fs::read(binary_file_path).context("Could not read from binary file path {}")?;
let (mut project, mut all_logs) = get_project_from_ghidra(
binary_file_path,
&binary[..],
bare_metal_config_opt.clone(),
verbose_flag,
debug_settings,
)?;

// Normalize the project and gather log messages generated from it.
all_logs.append(&mut project.normalize());
debug_settings.print(&project.program.term, debug::Stage::Ir(debug::IrForm::Raw));
all_logs.append(&mut project.normalize_basic());
debug_settings.print(
&project.program.term,
debug::Stage::Ir(debug::IrForm::Normalized),
);
all_logs.append(&mut project.normalize_optimize());
debug_settings.print(
&project.program.term,
debug::Stage::Ir(debug::IrForm::Optimized),
);

// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
Expand Down
Loading
Loading