Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPC: Display data values on hover for pools as well #140

Merged
merged 10 commits into from
Dec 4, 2024
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ android.keystore
*.frag
*.vert
*.metal
.vscode/launch.json
.vscode/
23 changes: 16 additions & 7 deletions objdiff-core/src/arch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,22 @@ pub enum DataType {

impl DataType {
pub fn display_bytes<Endian: ByteOrder>(&self, bytes: &[u8]) -> Option<String> {
// TODO: Attempt to interpret large symbols as arrays of a smaller type,
// fallback to intrepreting it as bytes.
// https://github.com/encounter/objdiff/issues/124
if self.required_len().is_some_and(|l| bytes.len() != l) {
log::warn!("Failed to display a symbol value for a symbol whose size doesn't match the instruction referencing it.");
if self.required_len().is_some_and(|l| bytes.len() < l) {
log::warn!("Failed to display a symbol value for a symbol whose size is too small for instruction referencing it.");
return None;
}
let mut bytes = bytes;
if self.required_len().is_some_and(|l| bytes.len() > l) {
// If the symbol's size is larger a single instance of this data type, we take just the
// bytes necessary for one of them in order to display the first element of the array.
bytes = &bytes[0..self.required_len().unwrap()];
// TODO: Attempt to interpret large symbols as arrays of a smaller type and show all
// elements of the array instead. https://github.com/encounter/objdiff/issues/124
// However, note that the stride of an array can not always be determined just by the
// data type guessed by the single instruction accessing it. There can also be arrays of
// structs that contain multiple elements of different types, so if other elements after
// the first one were to be displayed in this manner, they may be inaccurate.
}

match self {
DataType::Int8 => {
Expand Down Expand Up @@ -86,10 +95,10 @@ impl DataType {
}
}
DataType::Float => {
format!("Float: {}", Endian::read_f32(bytes))
format!("Float: {:?}f", Endian::read_f32(bytes))
}
DataType::Double => {
format!("Double: {}", Endian::read_f64(bytes))
format!("Double: {:?}", Endian::read_f64(bytes))
}
DataType::Bytes => {
format!("Bytes: {:#?}", bytes)
Expand Down
233 changes: 214 additions & 19 deletions objdiff-core/src/arch/ppc.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use std::{borrow::Cow, collections::BTreeMap};
use std::{
borrow::Cow,
collections::{BTreeMap, HashMap},
};

use anyhow::{bail, ensure, Result};
use byteorder::BigEndian;
Expand All @@ -7,7 +10,7 @@ use object::{
elf, File, Object, ObjectSection, ObjectSymbol, Relocation, RelocationFlags, RelocationTarget,
Symbol, SymbolKind,
};
use ppc750cl::{Argument, InsIter, Opcode, GPR};
use ppc750cl::{Argument, InsIter, Opcode, ParsedIns, GPR};

use crate::{
arch::{DataType, ObjArch, ProcessCodeResult},
Expand Down Expand Up @@ -49,6 +52,8 @@ impl ObjArch for ObjArchPpc {
let ins_count = code.len() / 4;
let mut ops = Vec::<u16>::with_capacity(ins_count);
let mut insts = Vec::<ObjIns>::with_capacity(ins_count);
let fake_pool_reloc_for_addr =
generate_fake_pool_reloc_for_addr_mapping(address, code, relocations);
for (cur_addr, mut ins) in InsIter::new(code, address as u32) {
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);
if let Some(reloc) = reloc {
Expand Down Expand Up @@ -145,7 +150,7 @@ impl ObjArch for ObjArchPpc {
size: 4,
mnemonic: Cow::Borrowed(simplified.mnemonic),
args,
reloc: reloc.cloned(),
reloc: reloc.or(fake_pool_reloc_for_addr.get(&cur_addr)).cloned(),
op: ins.op as u16,
branch_dest,
line,
Expand Down Expand Up @@ -173,6 +178,7 @@ impl ObjArch for ObjArchPpc {
fn display_reloc(&self, flags: RelocationFlags) -> Cow<'static, str> {
match flags {
RelocationFlags::Elf { r_type } => match r_type {
elf::R_PPC_NONE => Cow::Borrowed("R_PPC_NONE"), // We use this for fake pool relocs
elf::R_PPC_ADDR16_LO => Cow::Borrowed("R_PPC_ADDR16_LO"),
elf::R_PPC_ADDR16_HI => Cow::Borrowed("R_PPC_ADDR16_HI"),
elf::R_PPC_ADDR16_HA => Cow::Borrowed("R_PPC_ADDR16_HA"),
Expand All @@ -188,26 +194,22 @@ impl ObjArch for ObjArchPpc {
}

fn guess_data_type(&self, instruction: &ObjIns) -> Option<super::DataType> {
// Always shows the first string of the table. Not ideal, but it's really hard to find
// the actual string being referenced.
if instruction.reloc.as_ref().is_some_and(|r| r.target.name.starts_with("@stringBase")) {
return Some(DataType::String);
}

match Opcode::from(instruction.op as u8) {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),

Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
let op = Opcode::from(instruction.op as u8);
if let Some(ty) = guess_data_type_from_load_store_inst_op(op) {
Some(ty)
} else if op == Opcode::Addi {
// Assume that any addi instruction that references a local symbol is loading a string.
// This hack is not ideal and results in tons of false positives where it will show
// garbage strings (e.g. misinterpreting arrays, float literals, etc).
// But not all strings are in the @stringBase pool, so the condition above that checks
// the target symbol name would miss some.
Some(DataType::String)
} else {
None
}
}

Expand Down Expand Up @@ -381,3 +383,196 @@ fn make_symbol_ref(symbol: &Symbol) -> Result<ExtabSymbolRef> {
let demangled_name = cwdemangle::demangle(&name, &cwdemangle::DemangleOptions::default());
Ok(ExtabSymbolRef { original_index: symbol.index().0, name, demangled_name })
}

fn guess_data_type_from_load_store_inst_op(inst_op: Opcode) -> Option<DataType> {
match inst_op {
Opcode::Lbz | Opcode::Lbzu | Opcode::Lbzux | Opcode::Lbzx => Some(DataType::Int8),
Opcode::Lhz | Opcode::Lhzu | Opcode::Lhzux | Opcode::Lhzx => Some(DataType::Int16),
Opcode::Lha | Opcode::Lhau | Opcode::Lhaux | Opcode::Lhax => Some(DataType::Int16),
Opcode::Lwz | Opcode::Lwzu | Opcode::Lwzux | Opcode::Lwzx => Some(DataType::Int32),
Opcode::Lfs | Opcode::Lfsu | Opcode::Lfsux | Opcode::Lfsx => Some(DataType::Float),
Opcode::Lfd | Opcode::Lfdu | Opcode::Lfdux | Opcode::Lfdx => Some(DataType::Double),

Opcode::Stb | Opcode::Stbu | Opcode::Stbux | Opcode::Stbx => Some(DataType::Int8),
Opcode::Sth | Opcode::Sthu | Opcode::Sthux | Opcode::Sthx => Some(DataType::Int16),
Opcode::Stw | Opcode::Stwu | Opcode::Stwux | Opcode::Stwx => Some(DataType::Int32),
Opcode::Stfs | Opcode::Stfsu | Opcode::Stfsux | Opcode::Stfsx => Some(DataType::Float),
Opcode::Stfd | Opcode::Stfdu | Opcode::Stfdux | Opcode::Stfdx => Some(DataType::Double),
_ => None,
}
}

// Given an instruction, determine if it could accessing data at the address in a register.
// If so, return the offset added to the register's address, the register containing that address,
// and (optionally) which destination register the address is being copied into.
fn get_offset_and_addr_gpr_for_possible_pool_reference(
opcode: Opcode,
simplified: &ParsedIns,
) -> Option<(i16, GPR, Option<GPR>)> {
let args = &simplified.args;
if guess_data_type_from_load_store_inst_op(opcode).is_some() {
match (args[1], args[2]) {
(Argument::Offset(offset), Argument::GPR(addr_src_gpr)) => {
// e.g. lwz. Immediate offset.
Some((offset.0, addr_src_gpr, None))
}
(Argument::GPR(addr_src_gpr), Argument::GPR(_offset_gpr)) => {
// e.g. lwzx. The offset is in a register and was likely calculated from an index.
// Treat the offset as being 0 in this case to show the first element of the array.
// It may be possible to show all elements by figuring out the stride of the array
// from the calculations performed on the index before it's put into offset_gpr, but
// this would be much more complicated, so it's not currently done.
Some((0, addr_src_gpr, None))
}
_ => None,
}
} else {
// If it's not a load/store instruction, there's two more possibilities we need to handle.
// 1. It could be loading a pointer to a string.
// 2. It could be moving the relocation address plus an offset into a different register to
// load from later.
// If either of these match, we also want to return the destination register that the
// address is being copied into so that we can detect any future references to that new
// register as well.
match (opcode, args[0], args[1], args[2]) {
(
Opcode::Addi,
Argument::GPR(addr_dst_gpr),
Argument::GPR(addr_src_gpr),
Argument::Simm(simm),
) => Some((simm.0, addr_src_gpr, Some(addr_dst_gpr))),
(
Opcode::Or,
Argument::GPR(addr_dst_gpr),
Argument::GPR(addr_src_gpr),
Argument::None,
) => Some((0, addr_src_gpr, Some(addr_dst_gpr))), // `mr` or `mr.`
_ => None,
}
}
}

// We create a fake relocation for an instruction, vaguely simulating what the actual relocation
// might have looked like if it wasn't pooled. This is so minimal changes are needed to display
// pooled accesses vs non-pooled accesses. We set the relocation type to R_PPC_NONE to indicate that
// there isn't really a relocation here, as copying the pool relocation's type wouldn't make sense.
// Also, if this instruction is accessing the middle of a symbol instead of the start, we add an
// addend to indicate that.
fn make_fake_pool_reloc(offset: i16, cur_addr: u32, pool_reloc: &ObjReloc) -> Option<ObjReloc> {
let offset_from_pool = pool_reloc.addend + offset as i64;
let target_address = pool_reloc.target.address.checked_add_signed(offset_from_pool)?;
let orig_section_index = pool_reloc.target.orig_section_index?;
// We also need to create a fake target symbol to go inside our fake relocation.
// This is because we don't have access to list of all symbols in this section, so we can't find
// the real symbol yet. Instead we make a placeholder that has the correct `orig_section_index`
// and `address` fields, and then later on when this information is displayed to the user, we
// can find the real symbol by searching through the object's section's symbols for one that
// contains this address.
let fake_target_symbol = ObjSymbol {
name: "".to_string(),
demangled_name: None,
address: target_address,
section_address: 0,
size: 0,
size_known: false,
kind: Default::default(),
flags: Default::default(),
orig_section_index: Some(orig_section_index),
virtual_address: None,
original_index: None,
bytes: vec![],
};
// The addend is also fake because we don't know yet if the `target_address` here is the exact
// start of the symbol or if it's in the middle of it.
let fake_addend = 0;
Some(ObjReloc {
flags: RelocationFlags::Elf { r_type: elf::R_PPC_NONE },
address: cur_addr as u64,
target: fake_target_symbol,
addend: fake_addend,
})
}

// Searches through all instructions in a function, determining which registers have the addresses
// of pooled data relocations in them, finding which instructions load data from those addresses,
// and constructing a mapping of the address of that instruction to a "fake pool relocation" that
// simulates what that instruction's relocation would look like if data hadn't been pooled.
// Limitations: This method currently only goes through the instructions in a function in linear
// order, from start to finish. It does *not* follow any branches. This means that it could have
// false positives or false negatives in determining which relocation is currently loaded in which
// register at any given point in the function, as control flow is not respected.
// There are currently no known examples of this method producing inaccurate results in reality, but
// if examples are found, it may be possible to update this method to also follow all branches so
// that it produces more accurate results.
fn generate_fake_pool_reloc_for_addr_mapping(
address: u64,
code: &[u8],
relocations: &[ObjReloc],
) -> HashMap<u32, ObjReloc> {
let mut active_pool_relocs = HashMap::new();
let mut pool_reloc_for_addr = HashMap::new();
for (cur_addr, ins) in InsIter::new(code, address as u32) {
let simplified = ins.simplified();
let reloc = relocations.iter().find(|r| (r.address as u32 & !3) == cur_addr);

if let Some(reloc) = reloc {
// This instruction has a real relocation, so it may be a pool load we want to keep
// track of.
let args = &simplified.args;
match (ins.op, args[0], args[1], args[2]) {
(
Opcode::Addi,
Argument::GPR(addr_dst_gpr),
Argument::GPR(_addr_src_gpr),
Argument::Simm(_simm),
) => {
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `addi`
}
(
Opcode::Ori,
Argument::GPR(addr_dst_gpr),
Argument::GPR(_addr_src_gpr),
Argument::Uimm(_uimm),
) => {
active_pool_relocs.insert(addr_dst_gpr.0, reloc.clone()); // `lis` + `ori`
}
(Opcode::B, _, _, _) => {
if simplified.mnemonic == "bl" {
// When encountering a function call, clear any active pool relocations from
// the volatile registers (r0, r3-r12), but not the nonvolatile registers.
active_pool_relocs.remove(&0);
for gpr in 3..12 {
active_pool_relocs.remove(&gpr);
}
}
}
_ => {}
}
} else if let Some((offset, addr_src_gpr, addr_dst_gpr)) =
get_offset_and_addr_gpr_for_possible_pool_reference(ins.op, &simplified)
{
// This instruction doesn't have a real relocation, so it may be a reference to one of
// the already-loaded pools.
if let Some(pool_reloc) = active_pool_relocs.get(&addr_src_gpr.0) {
if let Some(fake_pool_reloc) = make_fake_pool_reloc(offset, cur_addr, pool_reloc) {
pool_reloc_for_addr.insert(cur_addr, fake_pool_reloc);
}
if let Some(addr_dst_gpr) = addr_dst_gpr {
// If the address of the pool relocation got copied into another register, we
// need to keep track of it in that register too as future instructions may
// reference the symbol indirectly via this new register, instead of the
// register the symbol's address was originally loaded into.
// For example, the start of the function might `lis` + `addi` the start of the
// ...data pool into r25, and then later the start of a loop will `addi` r25
// with the offset within the .data section of an array variable into r21.
// Then the body of the loop will `lwzx` one of the array elements from r21.
let mut new_reloc = pool_reloc.clone();
new_reloc.addend += offset as i64;
active_pool_relocs.insert(addr_dst_gpr.0, new_reloc);
}
}
}
}

pool_reloc_for_addr
}
35 changes: 32 additions & 3 deletions objdiff-core/src/diff/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
DiffObjConfig, ObjInsArgDiff, ObjInsBranchFrom, ObjInsBranchTo, ObjInsDiff, ObjInsDiffKind,
ObjSymbolDiff,
},
obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSymbolFlags, SymbolRef},
obj::{ObjInfo, ObjInsArg, ObjReloc, ObjSection, ObjSymbol, ObjSymbolFlags, SymbolRef},
};

pub fn process_code_symbol(
Expand All @@ -21,14 +21,30 @@ pub fn process_code_symbol(
let section = section.ok_or_else(|| anyhow!("Code symbol section not found"))?;
let code = &section.data
[symbol.section_address as usize..(symbol.section_address + symbol.size) as usize];
obj.arch.process_code(
let mut res = obj.arch.process_code(
symbol.address,
code,
section.orig_index,
&section.relocations,
&section.line_info,
config,
)
)?;

for inst in res.insts.iter_mut() {
if let Some(reloc) = &mut inst.reloc {
if reloc.target.size == 0 && reloc.target.name.is_empty() {
// Fake target symbol we added as a placeholder. We need to find the real one.
if let Some(real_target) =
find_symbol_matching_fake_symbol_in_sections(&reloc.target, &obj.sections)
{
reloc.addend = (reloc.target.address - real_target.address) as i64;
reloc.target = real_target;
}
}
}
}

Ok(res)
}

pub fn no_diff_code(out: &ProcessCodeResult, symbol_ref: SymbolRef) -> Result<ObjSymbolDiff> {
Expand Down Expand Up @@ -369,3 +385,16 @@ fn compare_ins(
}
Ok(result)
}

fn find_symbol_matching_fake_symbol_in_sections(
fake_symbol: &ObjSymbol,
sections: &[ObjSection],
) -> Option<ObjSymbol> {
let orig_section_index = fake_symbol.orig_section_index?;
let section = sections.iter().find(|s| s.orig_index == orig_section_index)?;
let real_symbol = section
.symbols
.iter()
.find(|s| s.size > 0 && (s.address..s.address + s.size).contains(&fake_symbol.address))?;
Some(real_symbol.clone())
}
2 changes: 1 addition & 1 deletion objdiff-gui/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ exec = "0.3"

# native:
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
tracing-subscriber = "0.3"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }

# web:
[target.'cfg(target_arch = "wasm32")'.dependencies]
Expand Down
Loading
Loading