Skip to content

Commit

Permalink
lib/utils/debug: introduce infrastructure for per-stage debugging
Browse files Browse the repository at this point in the history
While debugging analysis issues I found myself reinserting the
same print statements at the same places for each new investigation.

Therefore, introduce an extensible infrastructure to debug different
parts of the analysis. The `utils::debug::Settings` object is derived
from the command line and an immutable reference to it is the sole thing
that is being passed around to convey the current debug environment to
different part of the code. It also provides helper methods to perform
debug operations based on the runtime environment.

The related command-line options and the output format are entirely
unstable and only meant for development purposes.

Signed-off-by: Valentin Obst <[email protected]>
  • Loading branch information
Valentin Obst committed Jun 14, 2024
1 parent 1d1bda9 commit 563f9d7
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 13 deletions.
59 changes: 55 additions & 4 deletions src/caller/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,47 @@ extern crate cwe_checker_lib; // Needed for the docstring-link to work

use anyhow::Context;
use anyhow::Error;
use clap::Parser;
use clap::{Parser, ValueEnum};

use cwe_checker_lib::analysis::graph;
use cwe_checker_lib::pipeline::{disassemble_binary, AnalysisResults};
use cwe_checker_lib::utils::binary::BareMetalConfig;
use cwe_checker_lib::utils::debug;
use cwe_checker_lib::utils::log::{print_all_messages, LogLevel};
use cwe_checker_lib::utils::read_config_file;

use std::collections::{BTreeSet, HashSet};
use std::convert::From;
use std::path::PathBuf;

#[derive(ValueEnum, Clone, Debug, Copy)]
/// Selects which kind of debug output is displayed.
pub enum CliDebugMode {
/// Result of the Pointer Inference computation.
Pi,
/// Unnormalized IR form of the program.
IrRaw,
/// Normalized IR form of the program.
IrNorm,
/// Optimized IR form of the program.
IrOpt,
/// Output of the Ghidra plugin.
PcodeRaw,
}

impl From<&CliDebugMode> for debug::Stage {
fn from(mode: &CliDebugMode) -> Self {
use CliDebugMode::*;
match mode {
Pi => debug::Stage::Pi,
IrRaw => debug::Stage::Ir(debug::IrForm::Raw),
IrNorm => debug::Stage::Ir(debug::IrForm::Normalized),
IrOpt => debug::Stage::Ir(debug::IrForm::Optimized),
PcodeRaw => debug::Stage::Pcode(debug::PcodeForm::Raw),
}
}
}

#[derive(Debug, Parser)]
#[command(version, about)]
/// Find vulnerable patterns in binary executables
Expand Down Expand Up @@ -67,7 +99,25 @@ struct CmdlineArgs {
/// Output for debugging purposes.
/// The current behavior of this flag is unstable and subject to change.
#[arg(long, hide(true))]
debug: bool,
debug: Option<CliDebugMode>,
}

impl From<&CmdlineArgs> for debug::Settings {
fn from(args: &CmdlineArgs) -> Self {
let stage = match &args.debug {
None => debug::Stage::default(),
Some(mode) => mode.into(),
};
let verbose = if args.verbose {
debug::Verbosity::Verbose
} else if args.quiet {
debug::Verbosity::Quiet
} else {
debug::Verbosity::default()
};

debug::Settings::new(stage, verbose)
}
}

fn main() -> Result<(), Error> {
Expand All @@ -90,6 +140,7 @@ fn check_file_existence(file_path: &str) -> Result<String, String> {

/// Run the cwe_checker with Ghidra as its backend.
fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let debug_settings = args.into();
let mut modules = cwe_checker_lib::get_modules();
if args.module_versions {
// Only print the module versions and then quit.
Expand All @@ -111,7 +162,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
let binary_file_path = PathBuf::from(args.binary.clone().unwrap());

let (binary, project, mut all_logs) =
disassemble_binary(&binary_file_path, bare_metal_config_opt, args.verbose)?;
disassemble_binary(&binary_file_path, bare_metal_config_opt, &debug_settings)?;

// Filter the modules to be executed.
if let Some(ref partial_module_list) = args.partial {
Expand Down Expand Up @@ -186,7 +237,7 @@ fn run_with_ghidra(args: &CmdlineArgs) -> Result<(), Error> {
// Print debug and then return.
// Right now there is only one debug printing function.
// When more debug printing modes exist, this behaviour will change!
if args.debug {
if debug_settings.should_debug(debug::Stage::Pi) {
cwe_checker_lib::analysis::pointer_inference::run(
&analysis_results,
serde_json::from_value(config["Memory"].clone()).unwrap(),
Expand Down
18 changes: 15 additions & 3 deletions src/cwe_checker_lib/src/pipeline/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub use results::AnalysisResults;

use crate::intermediate_representation::{Project, RuntimeMemoryImage};
use crate::prelude::*;
use crate::utils::debug;
use crate::utils::log::LogMessage;
use crate::utils::{binary::BareMetalConfig, ghidra::get_project_from_ghidra};
use std::path::Path;
Expand All @@ -17,18 +18,29 @@ use std::path::Path;
pub fn disassemble_binary(
binary_file_path: &Path,
bare_metal_config_opt: Option<BareMetalConfig>,
verbose_flag: bool,
debug_settings: &debug::Settings,
) -> Result<(Vec<u8>, Project, Vec<LogMessage>), Error> {
let binary: Vec<u8> =
std::fs::read(binary_file_path).context("Could not read from binary file path {}")?;
let (mut project, mut all_logs) = get_project_from_ghidra(
binary_file_path,
&binary[..],
bare_metal_config_opt.clone(),
verbose_flag,
debug_settings,
)?;

// Normalize the project and gather log messages generated from it.
all_logs.append(&mut project.normalize());
debug_settings.print(&project.program.term, debug::Stage::Ir(debug::IrForm::Raw));
all_logs.append(&mut project.normalize_basic());
debug_settings.print(
&project.program.term,
debug::Stage::Ir(debug::IrForm::Normalized),
);
all_logs.append(&mut project.normalize_optimize());
debug_settings.print(
&project.program.term,
debug::Stage::Ir(debug::IrForm::Optimized),
);

// Generate the representation of the runtime memory image of the binary
let mut runtime_memory_image = if let Some(bare_metal_config) = bare_metal_config_opt.as_ref() {
Expand Down
105 changes: 105 additions & 0 deletions src/cwe_checker_lib/src/utils/debug.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,111 @@
//! Little helpers for developers that try to understand what their code is
//! doing.
#![allow(dead_code)]
#![allow(missing_docs)]

#[derive(PartialEq, Eq, Copy, Clone, Debug, Default)]
/// Stages of the analysis that can be debugged separately.
#[non_exhaustive]
pub enum Stage {
#[default]
No,
All,
Pi,
Ir(IrForm),
Pcode(PcodeForm),
Cwe,
}

#[derive(PartialEq, Eq, Copy, Clone, Debug)]
/// Substages of the IR generation that can be debugged separately.
#[non_exhaustive]
pub enum IrForm {
Raw,
Normalized,
Optimized,
}

#[derive(PartialEq, Eq, Copy, Clone, Debug)]
/// Substages of the Pcode transformation that can be debugged separately.
#[non_exhaustive]
pub enum PcodeForm {
Raw,
Processed,
}

#[derive(PartialEq, Eq, Copy, Clone, Debug, Default)]
/// Controls generation of log messages.
#[non_exhaustive]
pub enum Verbosity {
Quiet,
#[default]
Normal,
Verbose,
}

#[derive(PartialEq, Eq, Copy, Clone, Debug, Default)]
/// Selects whether the analysis is aborted after reaching the point of
/// interest.
#[non_exhaustive]
enum TerminationPolicy {
KeepRunning,
#[default]
EarlyExit,
Panic,
}

#[derive(PartialEq, Eq, Copy, Clone, Default, Debug)]
/// Configuration of the debugging behavior.
pub struct Settings {
stage: Stage,
verbose: Verbosity,
terminate: TerminationPolicy,
}

impl Settings {
/// Returns a new settings object.
pub fn new(stage: Stage, verbose: Verbosity) -> Self {
Self {
stage,
verbose,
terminate: TerminationPolicy::default(),
}
}

/// Returns true iff the `stage` is being debugged.
pub fn should_debug(&self, stage: Stage) -> bool {
debug_assert_ne!(stage, Stage::No);

stage == self.stage || matches!(stage, Stage::All)
}

/// Displays the `obj`ect if the stage is being debugged.
///
/// This is a possible cancellation point depending on the termination
/// policy.
pub fn print<T: std::fmt::Display>(&self, obj: &T, stage: Stage) {
if self.should_debug(stage) {
println!("{}", obj);
self.maybe_terminate();
}
}

/// Terminates the process according to the termination policy.
fn maybe_terminate(&self) {
match self.terminate {
TerminationPolicy::EarlyExit => std::process::exit(0),
TerminationPolicy::Panic => panic!(),
_ => (),
}
}

/// Returns true if the logging level is at least verbose.
pub fn verbose(&self) -> bool {
matches!(self.verbose, Verbosity::Verbose)
}
}

/// Central utility for debug printing in the `cwe_checker`.
///
/// The canonical way to do printf-debugging in `cwe_checker` development is to
Expand Down
21 changes: 15 additions & 6 deletions src/cwe_checker_lib/src/utils/ghidra.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ use crate::utils::binary::BareMetalConfig;
use crate::utils::{get_ghidra_plugin_path, read_config_file};
use crate::{
intermediate_representation::{Project, RuntimeMemoryImage},
utils::debug,
utils::log::LogMessage,
};

use directories::ProjectDirs;
use nix::{sys::stat, unistd};

use std::io::Read;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::thread;
Expand All @@ -20,7 +24,7 @@ pub fn get_project_from_ghidra(
file_path: &Path,
binary: &[u8],
bare_metal_config_opt: Option<BareMetalConfig>,
verbose_flag: bool,
debug_settings: &debug::Settings,
) -> Result<(Project, Vec<LogMessage>), Error> {
let tmp_folder = get_tmp_folder()?;
// We add a timestamp suffix to file names
Expand All @@ -41,7 +45,7 @@ pub fn get_project_from_ghidra(
&timestamp_suffix,
&bare_metal_config_opt,
)?;
let pcode_project = execute_ghidra(ghidra_command, &fifo_path, verbose_flag)?;
let pcode_project = execute_ghidra(ghidra_command, &fifo_path, debug_settings)?;

parse_pcode_project_to_ir_project(pcode_project, binary, &bare_metal_config_opt)
}
Expand Down Expand Up @@ -84,8 +88,9 @@ pub fn parse_pcode_project_to_ir_project(
fn execute_ghidra(
mut ghidra_command: Command,
fifo_path: &PathBuf,
verbose_flag: bool,
debug_settings: &debug::Settings,
) -> Result<crate::pcode::Project, Error> {
let should_print_ghidra_error = debug_settings.verbose();
// Create a new fifo and give read and write rights to the owner
unistd::mkfifo(fifo_path, stat::Mode::from_bits(0o600).unwrap())
.context("Error creating FIFO pipe")?;
Expand All @@ -106,7 +111,7 @@ fn execute_ghidra(
return;
}
}
if verbose_flag {
if should_print_ghidra_error {
eprintln!("{}", String::from_utf8(output.stdout).unwrap());
eprintln!("{}", String::from_utf8(output.stderr).unwrap());
if let Some(code) = output.status.code() {
Expand All @@ -120,9 +125,13 @@ fn execute_ghidra(
});

// Open the FIFO
let file = std::fs::File::open(fifo_path.clone()).expect("Could not open FIFO.");
let mut file = std::fs::File::open(fifo_path.clone()).expect("Could not open FIFO.");
let mut buf = String::new();
file.read_to_string(&mut buf)
.expect("Error while reading from FIFO.");
debug_settings.print(&buf, debug::Stage::Pcode(debug::PcodeForm::Raw));
let pcode_parsing_result = serde_json::from_str(&buf);

let pcode_parsing_result = serde_json::from_reader(std::io::BufReader::new(file));
ghidra_subprocess
.join()
.expect("The Ghidra thread to be joined has panicked!");
Expand Down

0 comments on commit 563f9d7

Please sign in to comment.