From 135d21b76fc62b1dc10598c7c0b8ea5e10604d0b Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Wed, 27 Nov 2024 21:05:07 -0500 Subject: [PATCH 01/10] Fix missing dependency feature for objdiff-gui --- objdiff-gui/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/objdiff-gui/Cargo.toml b/objdiff-gui/Cargo.toml index aac8873..555872f 100644 --- a/objdiff-gui/Cargo.toml +++ b/objdiff-gui/Cargo.toml @@ -95,7 +95,7 @@ exec = "0.3" # native: [target.'cfg(not(target_arch = "wasm32"))'.dependencies] -tracing-subscriber = "0.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } # web: [target.'cfg(target_arch = "wasm32")'.dependencies] From 774676ec8a11d3a5758f53b095fe51bd3f4d2ad4 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Thu, 28 Nov 2024 16:19:17 -0500 Subject: [PATCH 02/10] Update .gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c46c7cc..e4a9258 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ android.keystore *.frag *.vert *.metal -.vscode/launch.json +.vscode/ From 70b460649cac32a76d90f938c761d52673ac2664 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 1 Dec 2024 17:08:20 -0500 Subject: [PATCH 03/10] PPC: Display data values on hover for pools as well --- objdiff-core/src/arch/arm.rs | 2 + objdiff-core/src/arch/arm64.rs | 3 + objdiff-core/src/arch/mips.rs | 2 + objdiff-core/src/arch/mod.rs | 21 ++- objdiff-core/src/arch/ppc.rs | 211 ++++++++++++++++++++++--- objdiff-core/src/arch/x86.rs | 3 + objdiff-core/src/diff/code.rs | 1 + objdiff-core/src/obj/mod.rs | 1 + objdiff-gui/src/views/function_diff.rs | 28 ++-- 9 files changed, 237 insertions(+), 35 deletions(-) diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index b36b5b7..2126608 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -113,6 +113,7 @@ impl ObjArch for ObjArchArm { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let start_addr = address as u32; let end_addr = start_addr + code.len() as u32; @@ -216,6 +217,7 @@ impl ObjArch for ObjArchArm { mnemonic: Cow::Borrowed(parsed_ins.mnemonic), args, reloc, + fake_pool_reloc: None, branch_dest, line, formatted: parsed_ins.display(display_options).to_string(), diff --git a/objdiff-core/src/arch/arm64.rs b/objdiff-core/src/arch/arm64.rs index 7e396fa..b803252 100644 --- a/objdiff-core/src/arch/arm64.rs +++ b/objdiff-core/src/arch/arm64.rs @@ -29,6 +29,7 @@ impl ObjArch for ObjArchArm64 { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let start_address = address; let end_address = address + code.len() as u64; @@ -59,6 +60,7 @@ impl ObjArch for ObjArchArm64 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: None, + fake_pool_reloc: None, branch_dest: None, line: None, formatted: "".to_string(), @@ -121,6 +123,7 @@ impl ObjArch for ObjArchArm64 { mnemonic: Cow::Borrowed(mnemonic), args, reloc, + fake_pool_reloc: None, branch_dest, line, formatted: ins.to_string(), diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs index 4bb3be0..754d119 100644 --- a/objdiff-core/src/arch/mips.rs +++ b/objdiff-core/src/arch/mips.rs @@ -87,6 +87,7 @@ impl ObjArch for ObjArchMips { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let _guard = RABBITIZER_MUTEX.lock().map_err(|e| anyhow!("Failed to lock mutex: {e}"))?; configure_rabbitizer(match config.mips_abi { @@ -205,6 +206,7 @@ impl ObjArch for ObjArchMips { mnemonic: Cow::Borrowed(mnemonic), args, reloc: reloc.cloned(), + fake_pool_reloc: None, branch_dest, line, formatted, diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 9799e14..2f8e58c 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -36,13 +36,22 @@ pub enum DataType { impl DataType { pub fn display_bytes(&self, bytes: &[u8]) -> Option { - // TODO: Attempt to interpret large symbols as arrays of a smaller type, - // fallback to intrepreting it as bytes. - // https://github.com/encounter/objdiff/issues/124 - if self.required_len().is_some_and(|l| bytes.len() != l) { - log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it."); + if self.required_len().is_some_and(|l| bytes.len() < l) { + log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it."); return None; } + let mut bytes = bytes; + if self.required_len().is_some_and(|l| bytes.len() > l) { + // If the symbol's size is larger a single instance of this data type, we take just the + // bytes necessary for one of them in order to display the first element of the array. + bytes = &bytes[0..self.required_len().unwrap()]; + // TODO: Attempt to interpret large symbols as arrays of a smaller type and show all + // elements of the array instead. https://github.com/encounter/objdiff/issues/124 + // However, note that the stride of an array can not always be determined just by the + // data type guessed by the single instruction accessing it. There can also be arrays of + // structs that contain multiple elements of different types, so if other elements after + // the first one were to be displayed in this manner, they may be inaccurate. + } match self { DataType::Int8 => { @@ -117,6 +126,7 @@ impl DataType { } pub trait ObjArch: Send + Sync { + #[expect(clippy::too_many_arguments)] fn process_code( &self, address: u64, @@ -125,6 +135,7 @@ pub trait ObjArch: Send + Sync { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + sections: &[ObjSection], ) -> Result; fn implcit_addend( diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index 6c487fc..7e5d424 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -1,4 +1,7 @@ -use std::{borrow::Cow, collections::BTreeMap}; +use std::{ + borrow::Cow, + collections::{BTreeMap, HashMap}, +}; use anyhow::{bail, ensure, Result}; use byteorder::BigEndian; @@ -7,7 +10,7 @@ use object::{ elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget, Symbol, SymbolKind, }; -use ppc750cl::{Argument, InsIter, Opcode, GPR}; +use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR}; use crate::{ arch::{DataType, ObjArch, ProcessCodeResult}, @@ -27,6 +30,180 @@ fn is_rel_abs_arg(arg: &Argument) -> bool { fn is_offset_arg(arg: &Argument) -> bool { matches!(arg, Argument::Offset(_)) } +fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option { + match inst_op { + Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), + Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), + Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), + Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), + Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), + Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), + + Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), + Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), + Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), + Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), + Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), + _ => None, + } +} + +// Given an instruction, determine if it could accessing data at the address in a register. +// If so, return the offset added to the register's address, the register containing that address, +// and (optionally) which destination register the address is being copied into. +fn get_offset_and_addr_gpr_for_possible_pool_reference( + opcode: Opcode, + simplified: &ParsedIns, +) -> Option<(i16, GPR, Option)> { + let args = &simplified.args; + if guess_data_type_from_load_store_inst_op(opcode).is_some() { + match (args[1], args[2]) { + (Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => { + // e.g. lwz. Immediate offset. + Some((offset.0, addr_src_gpr, None)) + } + (Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => { + // e.g. lwzx. The offset is in a register and was likely calculated from an index. + // Treat the offset as being 0 in this case to show the first element of the array. + // It may be possible to show all elements by figuring out the stride of the array + // from the calculations performed on the index before it's put into offset_gpr, but + // this would be much more complicated, so it's not currently done. + Some((0, addr_src_gpr, None)) + } + _ => None, + } + } else { + // If it's not a load/store instruction, there's two more possibilities we need to handle. + // 1. It could be a reference to @stringBase. + // 2. It could be moving the relocation address plus an offset into a different register to + // load from later. + // If either of these match, we also want to return the destination register that the + // address is being copied into so that we can detect any future references to that new + // register as well. + match (opcode, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::Simm(simm), + ) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))), + ( + Opcode::Or, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::None, + ) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.` + _ => None, + } + } +} + +// We create a fake relocation for an instruction, vaguely simulating what the actual relocation +// might have looked like if it wasn't pooled. This is so minimal changes are needed to display +// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that +// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense. +// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an +// addend to indicate that. +fn make_fake_pool_reloc( + offset: i16, + cur_addr: u32, + pool_reloc: &ObjReloc, + sections: &[ObjSection], +) -> Option { + let offset_from_pool = pool_reloc.addend + offset as i64; + let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?; + let orig_section_index = pool_reloc.target.orig_section_index?; + let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; + let target_symbol = section + .symbols + .iter() + .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?; + let addend = (target_address - target_symbol.address) as i64; + Some(ObjReloc { + flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE }, + address: cur_addr as u64, + target: target_symbol.clone(), + addend, + }) +} + +// Searches through all instructions in a function, determining which registers have the addresses +// of pooled data relocations in them, finding which instructions load data from those addresses, +// and constructing a mapping of the address of that instruction to a "fake pool relocation" that +// simulates what that instruction's relocation would look like if data hadn't been pooled. +// Limitations: This method currently only goes through the instructions in a function in linear +// order, from start to finish. It does *not* follow any branches. This means that it could have +// false positives or false negatives in determining which relocation is currently loaded in which +// register at any given point in the function, as control flow is not respected. +// There are currently no known examples of this method producing inaccurate results in reality, but +// if examples are found, it may be possible to update this method to also follow all branches so +// that it produces more accurate results. +fn generate_fake_pool_reloc_for_addr_mapping( + address: u64, + code: &[u8], + relocations: &[ObjReloc], + sections: &[ObjSection], +) -> HashMap { + let mut active_pool_relocs = HashMap::new(); + let mut pool_reloc_for_addr = HashMap::new(); + for (cur_addr, ins) in InsIter::new(code, address as u32) { + let simplified = ins.simplified(); + let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + + if let Some(reloc) = reloc { + // This instruction has a real relocation, so it may be a pool load we want to keep + // track of. + let args = &simplified.args; + match (ins.op, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Simm(_simm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi` + } + ( + Opcode::Ori, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Uimm(_uimm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori` + } + _ => {} + } + } else if let Some((offset, addr_src_gpr, addr_dst_gpr)) = + get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified) + { + // This instruction doesn't have a real relocation, so it may be a reference to one of + // the already-loaded pools. + if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) { + if let Some(fake_pool_reloc) = + make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections) + { + pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc); + } + if let Some(addr_dst_gpr) = addr_dst_gpr { + // If the address of the pool relocation got copied into another register, we + // need to keep track of it in that register too as future instructions may + // reference the symbol indirectly via this new register, instead of the + // register the symbol's address was originally loaded into. + // For example, the start of the function might `lis` + `addi` the start of the + // ...data pool into r25, and then later the start of a loop will `addi` r25 + // with the offset within the .data section of an array variable into r21. + // Then the body of the loop will `lwzx` one of the array elements from r21. + let mut new_reloc = pool_reloc.clone(); + new_reloc.addend += offset as i64; + active_pool_relocs.insert(addr_dst_gpr.0, new_reloc); + } + } + } + } + + pool_reloc_for_addr +} + pub struct ObjArchPpc { /// Exception info pub extab: Option>, @@ -45,10 +222,13 @@ impl ObjArch for ObjArchPpc { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + sections: &[ObjSection], ) -> Result { let ins_count = code.len() / 4; let mut ops = Vec::::with_capacity(ins_count); let mut insts = Vec::::with_capacity(ins_count); + let fake_pool_reloc_for_addr = + generate_fake_pool_reloc_for_addr_mapping(address, code, relocations, sections); for (cur_addr, mut ins) in InsIter::new(code, address as u32) { let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); if let Some(reloc) = reloc { @@ -146,6 +326,7 @@ impl ObjArch for ObjArchPpc { mnemonic: Cow::Borrowed(simplified.mnemonic), args, reloc: reloc.cloned(), + fake_pool_reloc: fake_pool_reloc_for_addr.get(&cur_addr).cloned(), op: ins.op as u16, branch_dest, line, @@ -173,6 +354,7 @@ impl ObjArch for ObjArchPpc { fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> { match flags { RelocationFlags::Elf { r_type } => match r_type { + elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"), elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"), elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"), @@ -188,27 +370,16 @@ impl ObjArch for ObjArchPpc { } fn guess_data_type(&self, instruction: &ObjIns) -> Option { - // Always shows the first string of the table. Not ideal, but it's really hard to find - // the actual string being referenced. - if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) { + if instruction + .reloc + .as_ref() + .or(instruction.fake_pool_reloc.as_ref()) + .is_some_and(|r| r.target.name.starts_with("@stringBase")) + { return Some(DataType::String); } - match Opcode::from(instruction.op as u8) { - Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), - Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), - Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), - Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), - Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), - Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), - - Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), - Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), - Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), - Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), - Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), - _ => None, - } + guess_data_type_from_load_store_inst_op(Opcode::from(instruction.op as u8)) } fn display_data_type(&self, ty: DataType, bytes: &[u8]) -> Option { diff --git a/objdiff-core/src/arch/x86.rs b/objdiff-core/src/arch/x86.rs index 82399fb..a92e09f 100644 --- a/objdiff-core/src/arch/x86.rs +++ b/objdiff-core/src/arch/x86.rs @@ -34,6 +34,7 @@ impl ObjArch for ObjArchX86 { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, + _sections: &[ObjSection], ) -> Result { let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() }; let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE); @@ -54,6 +55,7 @@ impl ObjArch for ObjArchX86 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: None, + fake_pool_reloc: None, branch_dest: None, line: None, formatted: String::new(), @@ -79,6 +81,7 @@ impl ObjArch for ObjArchX86 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: reloc.cloned(), + fake_pool_reloc: None, branch_dest: None, line, formatted: String::new(), diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index 3d3273a..9e2fb88 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -28,6 +28,7 @@ pub fn process_code_symbol( §ion.relocations, §ion.line_info, config, + &obj.sections, ) } diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 1ef6d0f..800259d 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -106,6 +106,7 @@ pub struct ObjIns { pub mnemonic: Cow<'static, str>, pub args: Vec, pub reloc: Option, + pub fake_pool_reloc: Option, pub branch_dest: Option, /// Line number pub line: Option, diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index dde736e..d555065 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -1,4 +1,4 @@ -use std::default::Default; +use std::{cmp::Ordering, default::Default}; use egui::{text::LayoutJob, Id, Label, Response, RichText, Sense, Widget}; use egui_extras::TableRow; @@ -118,9 +118,17 @@ fn ins_hover_ui( } } - if let Some(reloc) = &ins.reloc { + if let Some(reloc) = ins.reloc.as_ref().or(ins.fake_pool_reloc.as_ref()) { ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags))); - ui.colored_label(appearance.highlight_color, format!("Name: {}", reloc.target.name)); + let addend_str = match reloc.addend.cmp(&0i64) { + Ordering::Greater => format!("+{:x}", reloc.addend), + Ordering::Less => format!("-{:x}", -reloc.addend), + _ => "".to_string(), + }; + ui.colored_label( + appearance.highlight_color, + format!("Name: {}{}", reloc.target.name, addend_str), + ); if let Some(orig_section_index) = reloc.target.orig_section_index { if let Some(section) = obj.sections.iter().find(|s| s.orig_index == orig_section_index) @@ -132,18 +140,18 @@ fn ins_hover_ui( } ui.colored_label( appearance.highlight_color, - format!("Address: {:x}", reloc.target.address), + format!("Address: {:x}{}", reloc.target.address, addend_str), ); ui.colored_label( appearance.highlight_color, format!("Size: {:x}", reloc.target.size), ); - if let Some(s) = obj - .arch - .guess_data_type(ins) - .and_then(|ty| obj.arch.display_data_type(ty, &reloc.target.bytes)) - { - ui.colored_label(appearance.highlight_color, s); + if reloc.addend >= 0 && reloc.target.bytes.len() > reloc.addend as usize { + if let Some(s) = obj.arch.guess_data_type(ins).and_then(|ty| { + obj.arch.display_data_type(ty, &reloc.target.bytes[reloc.addend as usize..]) + }) { + ui.colored_label(appearance.highlight_color, s); + } } } else { ui.colored_label(appearance.highlight_color, "Extern".to_string()); From cdf3fd178062f5beaf757f27f01feef2aa1ede41 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 1 Dec 2024 17:17:28 -0500 Subject: [PATCH 04/10] Tooltip data display: Format floats and doubles better Floats and doubles will now always be displayed with a decimal point and one digit after it, even if they are whole numbers. Floats will also have the f suffix. This is so you can tell the data type just by glancing at the value. --- objdiff-core/src/arch/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 2f8e58c..5fc2d01 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -95,10 +95,10 @@ impl DataType { } } DataType::Float => { - format!("Float: {}", Endian::read_f32(bytes)) + format!("Float: {:?}f", Endian::read_f32(bytes)) } DataType::Double => { - format!("Double: {}", Endian::read_f64(bytes)) + format!("Double: {:?}", Endian::read_f64(bytes)) } DataType::Bytes => { format!("Bytes: {:#?}", bytes) From 9051f48731bf1536ff04f387b56d9e36866cefe0 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Mon, 2 Dec 2024 01:00:39 -0500 Subject: [PATCH 05/10] Move big functions to bottom ppc.rs --- objdiff-core/src/arch/ppc.rs | 348 +++++++++++++++++------------------ 1 file changed, 174 insertions(+), 174 deletions(-) diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index 7e5d424..be84122 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -30,180 +30,6 @@ fn is_rel_abs_arg(arg: &Argument) -> bool { fn is_offset_arg(arg: &Argument) -> bool { matches!(arg, Argument::Offset(_)) } -fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option { - match inst_op { - Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), - Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), - Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), - Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), - Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), - Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), - - Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), - Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), - Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), - Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), - Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), - _ => None, - } -} - -// Given an instruction, determine if it could accessing data at the address in a register. -// If so, return the offset added to the register's address, the register containing that address, -// and (optionally) which destination register the address is being copied into. -fn get_offset_and_addr_gpr_for_possible_pool_reference( - opcode: Opcode, - simplified: &ParsedIns, -) -> Option<(i16, GPR, Option)> { - let args = &simplified.args; - if guess_data_type_from_load_store_inst_op(opcode).is_some() { - match (args[1], args[2]) { - (Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => { - // e.g. lwz. Immediate offset. - Some((offset.0, addr_src_gpr, None)) - } - (Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => { - // e.g. lwzx. The offset is in a register and was likely calculated from an index. - // Treat the offset as being 0 in this case to show the first element of the array. - // It may be possible to show all elements by figuring out the stride of the array - // from the calculations performed on the index before it's put into offset_gpr, but - // this would be much more complicated, so it's not currently done. - Some((0, addr_src_gpr, None)) - } - _ => None, - } - } else { - // If it's not a load/store instruction, there's two more possibilities we need to handle. - // 1. It could be a reference to @stringBase. - // 2. It could be moving the relocation address plus an offset into a different register to - // load from later. - // If either of these match, we also want to return the destination register that the - // address is being copied into so that we can detect any future references to that new - // register as well. - match (opcode, args[0], args[1], args[2]) { - ( - Opcode::Addi, - Argument::GPR(addr_dst_gpr), - Argument::GPR(addr_src_gpr), - Argument::Simm(simm), - ) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))), - ( - Opcode::Or, - Argument::GPR(addr_dst_gpr), - Argument::GPR(addr_src_gpr), - Argument::None, - ) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.` - _ => None, - } - } -} - -// We create a fake relocation for an instruction, vaguely simulating what the actual relocation -// might have looked like if it wasn't pooled. This is so minimal changes are needed to display -// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that -// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense. -// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an -// addend to indicate that. -fn make_fake_pool_reloc( - offset: i16, - cur_addr: u32, - pool_reloc: &ObjReloc, - sections: &[ObjSection], -) -> Option { - let offset_from_pool = pool_reloc.addend + offset as i64; - let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?; - let orig_section_index = pool_reloc.target.orig_section_index?; - let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; - let target_symbol = section - .symbols - .iter() - .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?; - let addend = (target_address - target_symbol.address) as i64; - Some(ObjReloc { - flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE }, - address: cur_addr as u64, - target: target_symbol.clone(), - addend, - }) -} - -// Searches through all instructions in a function, determining which registers have the addresses -// of pooled data relocations in them, finding which instructions load data from those addresses, -// and constructing a mapping of the address of that instruction to a "fake pool relocation" that -// simulates what that instruction's relocation would look like if data hadn't been pooled. -// Limitations: This method currently only goes through the instructions in a function in linear -// order, from start to finish. It does *not* follow any branches. This means that it could have -// false positives or false negatives in determining which relocation is currently loaded in which -// register at any given point in the function, as control flow is not respected. -// There are currently no known examples of this method producing inaccurate results in reality, but -// if examples are found, it may be possible to update this method to also follow all branches so -// that it produces more accurate results. -fn generate_fake_pool_reloc_for_addr_mapping( - address: u64, - code: &[u8], - relocations: &[ObjReloc], - sections: &[ObjSection], -) -> HashMap { - let mut active_pool_relocs = HashMap::new(); - let mut pool_reloc_for_addr = HashMap::new(); - for (cur_addr, ins) in InsIter::new(code, address as u32) { - let simplified = ins.simplified(); - let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); - - if let Some(reloc) = reloc { - // This instruction has a real relocation, so it may be a pool load we want to keep - // track of. - let args = &simplified.args; - match (ins.op, args[0], args[1], args[2]) { - ( - Opcode::Addi, - Argument::GPR(addr_dst_gpr), - Argument::GPR(_addr_src_gpr), - Argument::Simm(_simm), - ) => { - active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi` - } - ( - Opcode::Ori, - Argument::GPR(addr_dst_gpr), - Argument::GPR(_addr_src_gpr), - Argument::Uimm(_uimm), - ) => { - active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori` - } - _ => {} - } - } else if let Some((offset, addr_src_gpr, addr_dst_gpr)) = - get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified) - { - // This instruction doesn't have a real relocation, so it may be a reference to one of - // the already-loaded pools. - if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) { - if let Some(fake_pool_reloc) = - make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections) - { - pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc); - } - if let Some(addr_dst_gpr) = addr_dst_gpr { - // If the address of the pool relocation got copied into another register, we - // need to keep track of it in that register too as future instructions may - // reference the symbol indirectly via this new register, instead of the - // register the symbol's address was originally loaded into. - // For example, the start of the function might `lis` + `addi` the start of the - // ...data pool into r25, and then later the start of a loop will `addi` r25 - // with the offset within the .data section of an array variable into r21. - // Then the body of the loop will `lwzx` one of the array elements from r21. - let mut new_reloc = pool_reloc.clone(); - new_reloc.addend += offset as i64; - active_pool_relocs.insert(addr_dst_gpr.0, new_reloc); - } - } - } - } - - pool_reloc_for_addr -} - pub struct ObjArchPpc { /// Exception info pub extab: Option>, @@ -552,3 +378,177 @@ fn make_symbol_ref(symbol: &Symbol) -> Result { let demangled_name = cwdemangle::demangle(&name, &cwdemangle::DemangleOptions::default()); Ok(ExtabSymbolRef { original_index: symbol.index().0, name, demangled_name }) } + +fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option { + match inst_op { + Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8), + Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16), + Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16), + Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32), + Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float), + Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double), + + Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8), + Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16), + Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32), + Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float), + Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double), + _ => None, + } +} + +// Given an instruction, determine if it could accessing data at the address in a register. +// If so, return the offset added to the register's address, the register containing that address, +// and (optionally) which destination register the address is being copied into. +fn get_offset_and_addr_gpr_for_possible_pool_reference( + opcode: Opcode, + simplified: &ParsedIns, +) -> Option<(i16, GPR, Option)> { + let args = &simplified.args; + if guess_data_type_from_load_store_inst_op(opcode).is_some() { + match (args[1], args[2]) { + (Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => { + // e.g. lwz. Immediate offset. + Some((offset.0, addr_src_gpr, None)) + } + (Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => { + // e.g. lwzx. The offset is in a register and was likely calculated from an index. + // Treat the offset as being 0 in this case to show the first element of the array. + // It may be possible to show all elements by figuring out the stride of the array + // from the calculations performed on the index before it's put into offset_gpr, but + // this would be much more complicated, so it's not currently done. + Some((0, addr_src_gpr, None)) + } + _ => None, + } + } else { + // If it's not a load/store instruction, there's two more possibilities we need to handle. + // 1. It could be a reference to @stringBase. + // 2. It could be moving the relocation address plus an offset into a different register to + // load from later. + // If either of these match, we also want to return the destination register that the + // address is being copied into so that we can detect any future references to that new + // register as well. + match (opcode, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::Simm(simm), + ) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))), + ( + Opcode::Or, + Argument::GPR(addr_dst_gpr), + Argument::GPR(addr_src_gpr), + Argument::None, + ) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.` + _ => None, + } + } +} + +// We create a fake relocation for an instruction, vaguely simulating what the actual relocation +// might have looked like if it wasn't pooled. This is so minimal changes are needed to display +// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that +// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense. +// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an +// addend to indicate that. +fn make_fake_pool_reloc( + offset: i16, + cur_addr: u32, + pool_reloc: &ObjReloc, + sections: &[ObjSection], +) -> Option { + let offset_from_pool = pool_reloc.addend + offset as i64; + let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?; + let orig_section_index = pool_reloc.target.orig_section_index?; + let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; + let target_symbol = section + .symbols + .iter() + .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?; + let addend = (target_address - target_symbol.address) as i64; + Some(ObjReloc { + flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE }, + address: cur_addr as u64, + target: target_symbol.clone(), + addend, + }) +} + +// Searches through all instructions in a function, determining which registers have the addresses +// of pooled data relocations in them, finding which instructions load data from those addresses, +// and constructing a mapping of the address of that instruction to a "fake pool relocation" that +// simulates what that instruction's relocation would look like if data hadn't been pooled. +// Limitations: This method currently only goes through the instructions in a function in linear +// order, from start to finish. It does *not* follow any branches. This means that it could have +// false positives or false negatives in determining which relocation is currently loaded in which +// register at any given point in the function, as control flow is not respected. +// There are currently no known examples of this method producing inaccurate results in reality, but +// if examples are found, it may be possible to update this method to also follow all branches so +// that it produces more accurate results. +fn generate_fake_pool_reloc_for_addr_mapping( + address: u64, + code: &[u8], + relocations: &[ObjReloc], + sections: &[ObjSection], +) -> HashMap { + let mut active_pool_relocs = HashMap::new(); + let mut pool_reloc_for_addr = HashMap::new(); + for (cur_addr, ins) in InsIter::new(code, address as u32) { + let simplified = ins.simplified(); + let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); + + if let Some(reloc) = reloc { + // This instruction has a real relocation, so it may be a pool load we want to keep + // track of. + let args = &simplified.args; + match (ins.op, args[0], args[1], args[2]) { + ( + Opcode::Addi, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Simm(_simm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi` + } + ( + Opcode::Ori, + Argument::GPR(addr_dst_gpr), + Argument::GPR(_addr_src_gpr), + Argument::Uimm(_uimm), + ) => { + active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori` + } + _ => {} + } + } else if let Some((offset, addr_src_gpr, addr_dst_gpr)) = + get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified) + { + // This instruction doesn't have a real relocation, so it may be a reference to one of + // the already-loaded pools. + if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) { + if let Some(fake_pool_reloc) = + make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections) + { + pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc); + } + if let Some(addr_dst_gpr) = addr_dst_gpr { + // If the address of the pool relocation got copied into another register, we + // need to keep track of it in that register too as future instructions may + // reference the symbol indirectly via this new register, instead of the + // register the symbol's address was originally loaded into. + // For example, the start of the function might `lis` + `addi` the start of the + // ...data pool into r25, and then later the start of a loop will `addi` r25 + // with the offset within the .data section of an array variable into r21. + // Then the body of the loop will `lwzx` one of the array elements from r21. + let mut new_reloc = pool_reloc.clone(); + new_reloc.addend += offset as i64; + active_pool_relocs.insert(addr_dst_gpr.0, new_reloc); + } + } + } + } + + pool_reloc_for_addr +} From 507b988aa33b7acb391d24b1c69ad63d68a7d17d Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Mon, 2 Dec 2024 18:05:32 -0500 Subject: [PATCH 06/10] Clear pool relocs in volatile registers on function call This fixes some false positives. --- objdiff-core/src/arch/ppc.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index be84122..a14852a 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -520,6 +520,16 @@ fn generate_fake_pool_reloc_for_addr_mapping( ) => { active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori` } + (Opcode::B, _, _, _) => { + if simplified.mnemonic == "bl" { + // When encountering a function call, clear any active pool relocations from + // the volatile registers (r0, r3-r12), but not the nonvolatile registers. + active_pool_relocs.remove(&0); + for gpr in 3..12 { + active_pool_relocs.remove(&gpr); + } + } + } _ => {} } } else if let Some((offset, addr_src_gpr, addr_dst_gpr)) = From 9b1205d9aae9c06ed737ab06cebb31b7ab42cd31 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Tue, 3 Dec 2024 00:37:34 -0500 Subject: [PATCH 07/10] Revert ObjArch API changes, add fake target symbol hack Because we no longer have access to the actual symbol name via sections, guess_data_type can no longer detect the String data type for pooled references. --- objdiff-core/src/arch/arm.rs | 1 - objdiff-core/src/arch/arm64.rs | 1 - objdiff-core/src/arch/mips.rs | 1 - objdiff-core/src/arch/mod.rs | 2 -- objdiff-core/src/arch/ppc.rs | 48 +++++++++++++++----------- objdiff-core/src/arch/x86.rs | 1 - objdiff-core/src/diff/code.rs | 1 - objdiff-gui/src/views/function_diff.rs | 46 +++++++++++++++++------- 8 files changed, 62 insertions(+), 39 deletions(-) diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index 2126608..279cecb 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -113,7 +113,6 @@ impl ObjArch for ObjArchArm { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, - _sections: &[ObjSection], ) -> Result { let start_addr = address as u32; let end_addr = start_addr + code.len() as u32; diff --git a/objdiff-core/src/arch/arm64.rs b/objdiff-core/src/arch/arm64.rs index b803252..ae78f8a 100644 --- a/objdiff-core/src/arch/arm64.rs +++ b/objdiff-core/src/arch/arm64.rs @@ -29,7 +29,6 @@ impl ObjArch for ObjArchArm64 { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, - _sections: &[ObjSection], ) -> Result { let start_address = address; let end_address = address + code.len() as u64; diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs index 754d119..c2c28d1 100644 --- a/objdiff-core/src/arch/mips.rs +++ b/objdiff-core/src/arch/mips.rs @@ -87,7 +87,6 @@ impl ObjArch for ObjArchMips { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, - _sections: &[ObjSection], ) -> Result { let _guard = RABBITIZER_MUTEX.lock().map_err(|e| anyhow!("Failed to lock mutex: {e}"))?; configure_rabbitizer(match config.mips_abi { diff --git a/objdiff-core/src/arch/mod.rs b/objdiff-core/src/arch/mod.rs index 5fc2d01..82579dd 100644 --- a/objdiff-core/src/arch/mod.rs +++ b/objdiff-core/src/arch/mod.rs @@ -126,7 +126,6 @@ impl DataType { } pub trait ObjArch: Send + Sync { - #[expect(clippy::too_many_arguments)] fn process_code( &self, address: u64, @@ -135,7 +134,6 @@ pub trait ObjArch: Send + Sync { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, - sections: &[ObjSection], ) -> Result; fn implcit_addend( diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index a14852a..7d90db9 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -48,13 +48,12 @@ impl ObjArch for ObjArchPpc { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, - sections: &[ObjSection], ) -> Result { let ins_count = code.len() / 4; let mut ops = Vec::::with_capacity(ins_count); let mut insts = Vec::::with_capacity(ins_count); let fake_pool_reloc_for_addr = - generate_fake_pool_reloc_for_addr_mapping(address, code, relocations, sections); + generate_fake_pool_reloc_for_addr_mapping(address, code, relocations); for (cur_addr, mut ins) in InsIter::new(code, address as u32) { let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr); if let Some(reloc) = reloc { @@ -453,26 +452,38 @@ fn get_offset_and_addr_gpr_for_possible_pool_reference( // there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense. // Also, if this instruction is accessing the middle of a symbol instead of the start, we add an // addend to indicate that. -fn make_fake_pool_reloc( - offset: i16, - cur_addr: u32, - pool_reloc: &ObjReloc, - sections: &[ObjSection], -) -> Option { +fn make_fake_pool_reloc(offset: i16, cur_addr: u32, pool_reloc: &ObjReloc) -> Option { let offset_from_pool = pool_reloc.addend + offset as i64; let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?; let orig_section_index = pool_reloc.target.orig_section_index?; - let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; - let target_symbol = section - .symbols - .iter() - .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&target_address))?; - let addend = (target_address - target_symbol.address) as i64; + // We also need to create a fake target symbol to go inside our fake relocation. + // This is because we don't have access to list of all symbols in this section, so we can't find + // the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index` + // and `address` fields, and then later on when this information is displayed to the user, we + // can find the real symbol by searching through the object's section's symbols for one that + // contains this address. + let fake_target_symbol = ObjSymbol { + name: "".to_string(), + demangled_name: None, + address: target_address, + section_address: 0, + size: 0, + size_known: false, + kind: Default::default(), + flags: Default::default(), + orig_section_index: Some(orig_section_index), + virtual_address: None, + original_index: None, + bytes: vec![], + }; + // The addend is also fake because we don't know yet if the `target_address` here is the exact + // start of the symbol or if it's in the middle of it. + let fake_addend = 0; Some(ObjReloc { flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE }, address: cur_addr as u64, - target: target_symbol.clone(), - addend, + target: fake_target_symbol, + addend: fake_addend, }) } @@ -491,7 +502,6 @@ fn generate_fake_pool_reloc_for_addr_mapping( address: u64, code: &[u8], relocations: &[ObjReloc], - sections: &[ObjSection], ) -> HashMap { let mut active_pool_relocs = HashMap::new(); let mut pool_reloc_for_addr = HashMap::new(); @@ -538,9 +548,7 @@ fn generate_fake_pool_reloc_for_addr_mapping( // This instruction doesn't have a real relocation, so it may be a reference to one of // the already-loaded pools. if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) { - if let Some(fake_pool_reloc) = - make_fake_pool_reloc(offset, cur_addr, pool_reloc, sections) - { + if let Some(fake_pool_reloc) = make_fake_pool_reloc(offset, cur_addr, pool_reloc) { pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc); } if let Some(addr_dst_gpr) = addr_dst_gpr { diff --git a/objdiff-core/src/arch/x86.rs b/objdiff-core/src/arch/x86.rs index a92e09f..33c6675 100644 --- a/objdiff-core/src/arch/x86.rs +++ b/objdiff-core/src/arch/x86.rs @@ -34,7 +34,6 @@ impl ObjArch for ObjArchX86 { relocations: &[ObjReloc], line_info: &BTreeMap, config: &DiffObjConfig, - _sections: &[ObjSection], ) -> Result { let mut result = ProcessCodeResult { ops: Vec::new(), insts: Vec::new() }; let mut decoder = Decoder::with_ip(self.bits, code, address, DecoderOptions::NONE); diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index 9e2fb88..3d3273a 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -28,7 +28,6 @@ pub fn process_code_symbol( §ion.relocations, §ion.line_info, config, - &obj.sections, ) } diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index d555065..5f2d165 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -74,6 +74,19 @@ impl FunctionViewState { } } +fn find_symbol_matching_fake_symbol_in_sections( + fake_symbol: &ObjSymbol, + sections: &[ObjSection], +) -> Option { + let orig_section_index = fake_symbol.orig_section_index?; + let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; + let real_symbol = section + .symbols + .iter() + .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&fake_symbol.address))?; + Some(real_symbol.clone()) +} + fn ins_hover_ui( ui: &mut egui::Ui, obj: &ObjInfo, @@ -119,17 +132,29 @@ fn ins_hover_ui( } if let Some(reloc) = ins.reloc.as_ref().or(ins.fake_pool_reloc.as_ref()) { + let mut target = reloc.target.clone(); + let mut addend = reloc.addend; + if target.size == 0 && target.name.is_empty() { + // Fake target symbol we added as a placeholder. We need to find the real one. + if let Some(real_target) = + find_symbol_matching_fake_symbol_in_sections(&target, &obj.sections) + { + target = real_target; + addend = (reloc.target.address - target.address) as i64; + } + } + ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags))); - let addend_str = match reloc.addend.cmp(&0i64) { - Ordering::Greater => format!("+{:x}", reloc.addend), - Ordering::Less => format!("-{:x}", -reloc.addend), + let addend_str = match addend.cmp(&0i64) { + Ordering::Greater => format!("+{:x}", addend), + Ordering::Less => format!("-{:x}", -addend), _ => "".to_string(), }; ui.colored_label( appearance.highlight_color, - format!("Name: {}{}", reloc.target.name, addend_str), + format!("Name: {}{}", target.name, addend_str), ); - if let Some(orig_section_index) = reloc.target.orig_section_index { + if let Some(orig_section_index) = target.orig_section_index { if let Some(section) = obj.sections.iter().find(|s| s.orig_index == orig_section_index) { @@ -140,15 +165,12 @@ fn ins_hover_ui( } ui.colored_label( appearance.highlight_color, - format!("Address: {:x}{}", reloc.target.address, addend_str), - ); - ui.colored_label( - appearance.highlight_color, - format!("Size: {:x}", reloc.target.size), + format!("Address: {:x}{}", target.address, addend_str), ); - if reloc.addend >= 0 && reloc.target.bytes.len() > reloc.addend as usize { + ui.colored_label(appearance.highlight_color, format!("Size: {:x}", target.size)); + if addend >= 0 && target.bytes.len() > addend as usize { if let Some(s) = obj.arch.guess_data_type(ins).and_then(|ty| { - obj.arch.display_data_type(ty, &reloc.target.bytes[reloc.addend as usize..]) + obj.arch.display_data_type(ty, &target.bytes[addend as usize..]) }) { ui.colored_label(appearance.highlight_color, s); } From a51a5ac93f47b5f5211ba486df4fb5e039567ed5 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Tue, 3 Dec 2024 00:43:20 -0500 Subject: [PATCH 08/10] Add hack to detect strings via the addi opcode --- objdiff-core/src/arch/ppc.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index 7d90db9..ec9bb13 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -204,7 +204,21 @@ impl ObjArch for ObjArchPpc { return Some(DataType::String); } - guess_data_type_from_load_store_inst_op(Opcode::from(instruction.op as u8)) + let op = Opcode::from(instruction.op as u8); + if let Some(ty) = guess_data_type_from_load_store_inst_op(op) { + Some(ty) + } else if op == Opcode::Addi { + // Assume that any addi instruction that references a local symbol is loading a string. + // This hack is not ideal and results in tons of false positives where it will show + // garbage strings (e.g. misinterpreting arrays, float literals, etc). + // But there isn't much other choice as not all strings are in the @stringBase pool. + // And even those that are would be missed by the target.name.starts_with("@stringBase") + // hack above for fake pooled relocations, as they have an empty string placeholder for + // the target symbol name. + Some(DataType::String) + } else { + None + } } fn display_data_type(&self, ty: DataType, bytes: &[u8]) -> Option { @@ -422,7 +436,7 @@ fn get_offset_and_addr_gpr_for_possible_pool_reference( } } else { // If it's not a load/store instruction, there's two more possibilities we need to handle. - // 1. It could be a reference to @stringBase. + // 1. It could be loading a pointer to a string. // 2. It could be moving the relocation address plus an offset into a different register to // load from later. // If either of these match, we also want to return the destination register that the From 042fa9ef9e0b9a356b7171d4579b904a4a372674 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Tue, 3 Dec 2024 18:59:42 -0500 Subject: [PATCH 09/10] Move hack to resolve placeholder symbol into process_code_symbol --- objdiff-core/src/arch/ppc.rs | 6 ++-- objdiff-core/src/diff/code.rs | 35 ++++++++++++++++++-- objdiff-gui/src/views/function_diff.rs | 46 +++++++------------------- 3 files changed, 46 insertions(+), 41 deletions(-) diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index ec9bb13..7e49267 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -211,10 +211,8 @@ impl ObjArch for ObjArchPpc { // Assume that any addi instruction that references a local symbol is loading a string. // This hack is not ideal and results in tons of false positives where it will show // garbage strings (e.g. misinterpreting arrays, float literals, etc). - // But there isn't much other choice as not all strings are in the @stringBase pool. - // And even those that are would be missed by the target.name.starts_with("@stringBase") - // hack above for fake pooled relocations, as they have an empty string placeholder for - // the target symbol name. + // But not all strings are in the @stringBase pool, so the condition above that checks + // the target symbol name would miss some. Some(DataType::String) } else { None diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index 3d3273a..b5c0fad 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -9,7 +9,7 @@ use crate::{ DiffObjConfig, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff, ObjInsDiffKind, ObjSymbolDiff, }, - obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSymbolFlags, SymbolRef}, + obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSection, ObjSymbol, ObjSymbolFlags, SymbolRef}, }; pub fn process_code_symbol( @@ -21,14 +21,30 @@ pub fn process_code_symbol( let section = section.ok_or_else(|| anyhow!("Code symbol section not found"))?; let code = §ion.data [symbol.section_address as usize..(symbol.section_address + symbol.size) as usize]; - obj.arch.process_code( + let mut res = obj.arch.process_code( symbol.address, code, section.orig_index, §ion.relocations, §ion.line_info, config, - ) + )?; + + for inst in res.insts.iter_mut() { + if let Some(reloc) = &mut inst.fake_pool_reloc { + if reloc.target.size == 0 && reloc.target.name.is_empty() { + // Fake target symbol we added as a placeholder. We need to find the real one. + if let Some(real_target) = + find_symbol_matching_fake_symbol_in_sections(&reloc.target, &obj.sections) + { + reloc.addend = (reloc.target.address - real_target.address) as i64; + reloc.target = real_target; + } + } + } + } + + Ok(res) } pub fn no_diff_code(out: &ProcessCodeResult, symbol_ref: SymbolRef) -> Result { @@ -369,3 +385,16 @@ fn compare_ins( } Ok(result) } + +fn find_symbol_matching_fake_symbol_in_sections( + fake_symbol: &ObjSymbol, + sections: &[ObjSection], +) -> Option { + let orig_section_index = fake_symbol.orig_section_index?; + let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; + let real_symbol = section + .symbols + .iter() + .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&fake_symbol.address))?; + Some(real_symbol.clone()) +} diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index 5f2d165..d555065 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -74,19 +74,6 @@ impl FunctionViewState { } } -fn find_symbol_matching_fake_symbol_in_sections( - fake_symbol: &ObjSymbol, - sections: &[ObjSection], -) -> Option { - let orig_section_index = fake_symbol.orig_section_index?; - let section = sections.iter().find(|s| s.orig_index == orig_section_index)?; - let real_symbol = section - .symbols - .iter() - .find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&fake_symbol.address))?; - Some(real_symbol.clone()) -} - fn ins_hover_ui( ui: &mut egui::Ui, obj: &ObjInfo, @@ -132,29 +119,17 @@ fn ins_hover_ui( } if let Some(reloc) = ins.reloc.as_ref().or(ins.fake_pool_reloc.as_ref()) { - let mut target = reloc.target.clone(); - let mut addend = reloc.addend; - if target.size == 0 && target.name.is_empty() { - // Fake target symbol we added as a placeholder. We need to find the real one. - if let Some(real_target) = - find_symbol_matching_fake_symbol_in_sections(&target, &obj.sections) - { - target = real_target; - addend = (reloc.target.address - target.address) as i64; - } - } - ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags))); - let addend_str = match addend.cmp(&0i64) { - Ordering::Greater => format!("+{:x}", addend), - Ordering::Less => format!("-{:x}", -addend), + let addend_str = match reloc.addend.cmp(&0i64) { + Ordering::Greater => format!("+{:x}", reloc.addend), + Ordering::Less => format!("-{:x}", -reloc.addend), _ => "".to_string(), }; ui.colored_label( appearance.highlight_color, - format!("Name: {}{}", target.name, addend_str), + format!("Name: {}{}", reloc.target.name, addend_str), ); - if let Some(orig_section_index) = target.orig_section_index { + if let Some(orig_section_index) = reloc.target.orig_section_index { if let Some(section) = obj.sections.iter().find(|s| s.orig_index == orig_section_index) { @@ -165,12 +140,15 @@ fn ins_hover_ui( } ui.colored_label( appearance.highlight_color, - format!("Address: {:x}{}", target.address, addend_str), + format!("Address: {:x}{}", reloc.target.address, addend_str), + ); + ui.colored_label( + appearance.highlight_color, + format!("Size: {:x}", reloc.target.size), ); - ui.colored_label(appearance.highlight_color, format!("Size: {:x}", target.size)); - if addend >= 0 && target.bytes.len() > addend as usize { + if reloc.addend >= 0 && reloc.target.bytes.len() > reloc.addend as usize { if let Some(s) = obj.arch.guess_data_type(ins).and_then(|ty| { - obj.arch.display_data_type(ty, &target.bytes[addend as usize..]) + obj.arch.display_data_type(ty, &reloc.target.bytes[reloc.addend as usize..]) }) { ui.colored_label(appearance.highlight_color, s); } From dc46914428b86e9bb388c300556094b7f3afb2be Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Tue, 3 Dec 2024 19:13:58 -0500 Subject: [PATCH 10/10] Merge reloc and fake_pool_reloc fields of ObjIns --- objdiff-core/src/arch/arm.rs | 1 - objdiff-core/src/arch/arm64.rs | 2 -- objdiff-core/src/arch/mips.rs | 1 - objdiff-core/src/arch/ppc.rs | 10 ++-------- objdiff-core/src/arch/x86.rs | 2 -- objdiff-core/src/diff/code.rs | 2 +- objdiff-core/src/obj/mod.rs | 1 - objdiff-gui/src/views/function_diff.rs | 2 +- 8 files changed, 4 insertions(+), 17 deletions(-) diff --git a/objdiff-core/src/arch/arm.rs b/objdiff-core/src/arch/arm.rs index 279cecb..b36b5b7 100644 --- a/objdiff-core/src/arch/arm.rs +++ b/objdiff-core/src/arch/arm.rs @@ -216,7 +216,6 @@ impl ObjArch for ObjArchArm { mnemonic: Cow::Borrowed(parsed_ins.mnemonic), args, reloc, - fake_pool_reloc: None, branch_dest, line, formatted: parsed_ins.display(display_options).to_string(), diff --git a/objdiff-core/src/arch/arm64.rs b/objdiff-core/src/arch/arm64.rs index ae78f8a..7e396fa 100644 --- a/objdiff-core/src/arch/arm64.rs +++ b/objdiff-core/src/arch/arm64.rs @@ -59,7 +59,6 @@ impl ObjArch for ObjArchArm64 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: None, - fake_pool_reloc: None, branch_dest: None, line: None, formatted: "".to_string(), @@ -122,7 +121,6 @@ impl ObjArch for ObjArchArm64 { mnemonic: Cow::Borrowed(mnemonic), args, reloc, - fake_pool_reloc: None, branch_dest, line, formatted: ins.to_string(), diff --git a/objdiff-core/src/arch/mips.rs b/objdiff-core/src/arch/mips.rs index c2c28d1..4bb3be0 100644 --- a/objdiff-core/src/arch/mips.rs +++ b/objdiff-core/src/arch/mips.rs @@ -205,7 +205,6 @@ impl ObjArch for ObjArchMips { mnemonic: Cow::Borrowed(mnemonic), args, reloc: reloc.cloned(), - fake_pool_reloc: None, branch_dest, line, formatted, diff --git a/objdiff-core/src/arch/ppc.rs b/objdiff-core/src/arch/ppc.rs index 7e49267..3768a53 100644 --- a/objdiff-core/src/arch/ppc.rs +++ b/objdiff-core/src/arch/ppc.rs @@ -150,8 +150,7 @@ impl ObjArch for ObjArchPpc { size: 4, mnemonic: Cow::Borrowed(simplified.mnemonic), args, - reloc: reloc.cloned(), - fake_pool_reloc: fake_pool_reloc_for_addr.get(&cur_addr).cloned(), + reloc: reloc.or(fake_pool_reloc_for_addr.get(&cur_addr)).cloned(), op: ins.op as u16, branch_dest, line, @@ -195,12 +194,7 @@ impl ObjArch for ObjArchPpc { } fn guess_data_type(&self, instruction: &ObjIns) -> Option { - if instruction - .reloc - .as_ref() - .or(instruction.fake_pool_reloc.as_ref()) - .is_some_and(|r| r.target.name.starts_with("@stringBase")) - { + if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) { return Some(DataType::String); } diff --git a/objdiff-core/src/arch/x86.rs b/objdiff-core/src/arch/x86.rs index 33c6675..82399fb 100644 --- a/objdiff-core/src/arch/x86.rs +++ b/objdiff-core/src/arch/x86.rs @@ -54,7 +54,6 @@ impl ObjArch for ObjArchX86 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: None, - fake_pool_reloc: None, branch_dest: None, line: None, formatted: String::new(), @@ -80,7 +79,6 @@ impl ObjArch for ObjArchX86 { mnemonic: Cow::Borrowed(""), args: vec![], reloc: reloc.cloned(), - fake_pool_reloc: None, branch_dest: None, line, formatted: String::new(), diff --git a/objdiff-core/src/diff/code.rs b/objdiff-core/src/diff/code.rs index b5c0fad..e645a24 100644 --- a/objdiff-core/src/diff/code.rs +++ b/objdiff-core/src/diff/code.rs @@ -31,7 +31,7 @@ pub fn process_code_symbol( )?; for inst in res.insts.iter_mut() { - if let Some(reloc) = &mut inst.fake_pool_reloc { + if let Some(reloc) = &mut inst.reloc { if reloc.target.size == 0 && reloc.target.name.is_empty() { // Fake target symbol we added as a placeholder. We need to find the real one. if let Some(real_target) = diff --git a/objdiff-core/src/obj/mod.rs b/objdiff-core/src/obj/mod.rs index 800259d..1ef6d0f 100644 --- a/objdiff-core/src/obj/mod.rs +++ b/objdiff-core/src/obj/mod.rs @@ -106,7 +106,6 @@ pub struct ObjIns { pub mnemonic: Cow<'static, str>, pub args: Vec, pub reloc: Option, - pub fake_pool_reloc: Option, pub branch_dest: Option, /// Line number pub line: Option, diff --git a/objdiff-gui/src/views/function_diff.rs b/objdiff-gui/src/views/function_diff.rs index d555065..df18958 100644 --- a/objdiff-gui/src/views/function_diff.rs +++ b/objdiff-gui/src/views/function_diff.rs @@ -118,7 +118,7 @@ fn ins_hover_ui( } } - if let Some(reloc) = ins.reloc.as_ref().or(ins.fake_pool_reloc.as_ref()) { + if let Some(reloc) = &ins.reloc { ui.label(format!("Relocation type: {}", obj.arch.display_reloc(reloc.flags))); let addend_str = match reloc.addend.cmp(&0i64) { Ordering::Greater => format!("+{:x}", reloc.addend),