diff --git a/src/spimdisasm/src/analysis/instruction_analysis_result.rs b/src/spimdisasm/src/analysis/instruction_analysis_result.rs index b485d33..8011c33 100644 --- a/src/spimdisasm/src/analysis/instruction_analysis_result.rs +++ b/src/spimdisasm/src/analysis/instruction_analysis_result.rs @@ -2,7 +2,9 @@ /* SPDX-License-Identifier: MIT */ use alloc::collections::{btree_map::BTreeMap, btree_set::BTreeSet}; -use rabbitizer::{Instruction, Vram}; +use rabbitizer::{ + opcodes::Opcode, registers::Gpr, traits::Register, vram::VramOffset, Instruction, Vram, +}; use crate::{context::Context, rom_address::RomAddress, rom_vram_range::RomVramRange}; @@ -22,6 +24,14 @@ pub struct InstructionAnalysisResult { /// Key is the rom of the instruction, value is the address of the called function. func_calls: BTreeMap, + + hi_instrs: BTreeMap, + non_lo_instrs: BTreeSet, + + // TODO: merge these 3 thingies + address_per_instr: BTreeMap, + address_per_hi_instr: BTreeMap, + address_per_lo_instr: BTreeMap, } impl InstructionAnalysisResult { @@ -33,6 +43,11 @@ impl InstructionAnalysisResult { referenced_vrams_by_rom: BTreeMap::new(), branch_targets: BTreeMap::new(), func_calls: BTreeMap::new(), + hi_instrs: BTreeMap::new(), + non_lo_instrs: BTreeSet::new(), + address_per_instr: BTreeMap::new(), + address_per_hi_instr: BTreeMap::new(), + address_per_lo_instr: BTreeMap::new(), } } @@ -50,6 +65,15 @@ impl InstructionAnalysisResult { pub fn func_calls(&self) -> &BTreeMap { &self.func_calls } + + #[must_use] + pub fn address_per_hi_instr(&self) -> &BTreeMap { + &self.address_per_hi_instr + } + #[must_use] + pub fn address_per_lo_instr(&self) -> &BTreeMap { + &self.address_per_lo_instr + } } impl InstructionAnalysisResult { @@ -67,7 +91,7 @@ impl InstructionAnalysisResult { context: &Context, regs_tracker: &mut RegisterTracker, instr: &Instruction, - _prev_instr: Option<&Instruction>, + prev_instr: Option<&Instruction>, ) { if let Some(target_vram) = instr.get_branch_vram_generic() { // instr.opcode().is_branch() or instr.is_unconditional_branch() @@ -75,7 +99,22 @@ impl InstructionAnalysisResult { } else if let Some(target_vram) = instr.get_instr_index_as_vram() { // instr.opcode().is_jump_with_address() self.process_func_call(context, instr, target_vram); + } else if instr.is_jumptable_jump() { + self.process_jumptable_jump(regs_tracker, instr); + } else if instr.opcode().is_jump() && instr.opcode().does_link() { + // `jalr`. Implicit `!is_jump_with_address` + self.process_jump_and_link_register(regs_tracker, instr); + } else if instr.opcode().can_be_hi() { + self.process_hi(regs_tracker, instr, prev_instr); + } else if instr.opcode().is_unsigned() { + self.process_unsigned_lo(regs_tracker, instr); + } else if instr.opcode().can_be_lo() { + self.process_signed_lo(context, regs_tracker, instr, prev_instr); + } else if instr.opcode() == Opcode::core_addu { + self.process_symbol_dereference_type(regs_tracker, instr); } + + regs_tracker.overwrite_registers(instr, self.rom_from_instr(instr)); } } @@ -91,7 +130,7 @@ impl InstructionAnalysisResult { return; } - regs_tracker.process_branch(instr); + regs_tracker.process_branch(instr, self.rom_from_instr(instr)); /* if instrOffset in self.branchInstrOffsets: @@ -99,10 +138,7 @@ impl InstructionAnalysisResult { return */ - let instr_rom = self - .ranges - .rom_from_vram(instr.vram()) - .expect("This should not panic"); + let instr_rom = self.rom_from_instr(instr); self.add_referenced_vram(context, instr_rom, target_vram); self.branch_targets.insert(instr_rom, target_vram); } @@ -119,28 +155,350 @@ impl InstructionAnalysisResult { self.funcCallOutsideRangesOffsets[instrOffset] = target */ - let instr_rom = self - .ranges - .rom_from_vram(instr.vram()) - .expect("This should not panic"); + let instr_rom = self.rom_from_instr(instr); self.add_referenced_vram(context, instr_rom, target_vram); self.func_calls.insert(instr_rom, target_vram); } + + fn process_jumptable_jump( + &mut self, + _regs_tracker: &mut RegisterTracker, + _instr: &Instruction, + ) { + // TODO + } + + fn process_jump_and_link_register( + &mut self, + _regs_tracker: &mut RegisterTracker, + _instr: &Instruction, + ) { + // TODO + } + + fn process_hi( + &mut self, + regs_tracker: &mut RegisterTracker, + instr: &Instruction, + prev_instr: Option<&Instruction>, + ) { + let instr_rom = self.rom_from_instr(instr); + regs_tracker.process_hi(instr, instr_rom, prev_instr); + self.hi_instrs.insert( + instr_rom, + ( + instr.get_destination_gpr().unwrap(), + instr.get_processed_immediate().unwrap() as u16, + ), + ); + } + + fn process_unsigned_lo(&mut self, _regs_tracker: &mut RegisterTracker, _instr: &Instruction) { + // TODO + /* + # Constants + luiOffset = regsTracker.getLuiOffsetForConstant(instr) + if luiOffset is None: + return + luiInstr = self.luiInstrs.get(luiOffset, None) + if luiInstr is None: + return + self.processConstant(regsTracker, luiInstr, luiOffset, instr, instrOffset) + */ + } + + fn process_signed_lo( + &mut self, + context: &Context, + regs_tracker: &mut RegisterTracker, + instr: &Instruction, + _prev_instr: Option<&Instruction>, + ) { + let instr_rom = self.rom_from_instr(instr); + + // TODO + if instr.opcode().does_load() + && instr + .get_destination_gpr() + .is_some_and(|reg| reg.is_global_pointer(instr.abi())) + { + regs_tracker.process_gp_load(instr, instr_rom); + } + + /* + if instrOffset in self.nonLoInstrOffsets: + return + */ + + let pairing_info = regs_tracker.preprocess_lo_and_get_info(instr, instr_rom); + if pairing_info.is_none() { + if regs_tracker.has_lo_but_not_hi(instr) { + self.non_lo_instrs.insert(instr_rom); + } + return; + } + let pairing_info = pairing_info.unwrap(); + + if pairing_info.is_gp_got && !context.global_config().gp_config().is_some_and(|x| x.pic()) { + return; + } + + let upper_info = if pairing_info.is_gp_rel { + None + } else { + Some((pairing_info.value, pairing_info.instr_rom)) + }; + + if let Some((_upper_half, hi_rom)) = upper_info { + if let Some((hi_reg, _hi_imm)) = self.hi_instrs.get(&hi_rom) { + if hi_reg.is_global_pointer(instr.abi()) { + if let Some(lo_rs) = instr.field_rs() { + if instr.opcode().reads_rs() && lo_rs.is_global_pointer(instr.abi()) { + if let Some(lo_rt) = instr.field_rt() { + if instr.opcode().modifies_rt() + && lo_rt.is_global_pointer(instr.abi()) + { + if context.global_config().gp_config().is_some_and(|x| x.pic()) + { + /* + # cpload + self.unpairedCploads.append(CploadInfo(luiOffset, instrOffset)) + */ + } else { + /* + hiGpValue = luiInstr.getProcessedImmediate() << 16 + loGpValue = instr.getProcessedImmediate() + self.gpSets[instrOffset] = GpSetInfo(luiOffset, instrOffset, hiGpValue+loGpValue) + self.gpSetsOffsets.add(luiOffset) + self.gpSetsOffsets.add(instrOffset) + */ + } + // Early return to avoid counting this pairing as a normal symbol + return; + } + } + } + } + } + } + } + + let address = self.pair_hi_lo(context, &upper_info, instr, instr_rom); + if address.is_none() { + return; + } + let address = address.unwrap(); + if upper_info.is_none() { + if context.global_config().gp_config().is_some_and(|x| x.pic()) { + self.process_got_symbol(address, instr_rom); + return; + } + } + + if self.process_address(context, address, &upper_info, instr, instr_rom) { + // TODO: move out from this check + regs_tracker.process_lo(instr, address.inner(), instr_rom); + } + } + + fn process_symbol_dereference_type( + &mut self, + _regs_tracker: &mut RegisterTracker, + _instr: &Instruction, + ) { + // TODO + } +} + +impl InstructionAnalysisResult { + fn pair_hi_lo( + &mut self, + context: &Context, + upper_info: &Option<(i64, RomAddress)>, + instr: &Instruction, + _instr_rom: RomAddress, + ) -> Option { + // upper_info being None means this symbol is a $gp access + + let lower_half = if let Some(x) = instr.get_processed_immediate() { + VramOffset::new(x) + } else { + return None; + }; + /* + + if lowerOffset in self.symbolLoInstrOffset: + # This %lo has been processed already + + # Check the other lui has the same immediate value as this one, and reject the pair if it doesn't + if hiValue is not None: + otherLuiOffset = self.lowToHiDict.get(lowerOffset, None) + if otherLuiOffset is not None: + otherLuiInstr = self.luiInstrs.get(otherLuiOffset, None) + if otherLuiInstr is not None: + if hiValue != otherLuiInstr.getProcessedImmediate() << 16: + return None + + if not common.GlobalConfig.COMPILER.value.pairMultipleHiToSameLow: + # IDO does not pair multiples %hi to the same %lo + return self.symbolLoInstrOffset[lowerOffset] + + else: + if luiOffset is None or hiValue is None: + return None + + if self.hiToLowDict.get(luiOffset, None) == lowerOffset and self.lowToHiDict.get(lowerOffset, None) == luiOffset: + # This pair has been already paired + return self.symbolLoInstrOffset[lowerOffset] + + # luiInstrPrev = self.instructions[(luiOffset-4)//4] + # if luiInstrPrev.isBranchLikely() or luiInstrPrev.isUnconditionalBranch(): + # # This lui will be nullified afterwards, so it is likely for it to be re-used lui + # pass + # elif luiInstrPrev.isBranch(): + # # I'm not really sure if a lui on any branch slot is enough to believe this is really a symbol + # # Let's hope it does for now... + # pass + # elif luiOffset + 4 == lowerOffset: + if luiOffset + 4 == lowerOffset: + # Make an exception if the lower instruction is just after the LUI + pass + else: + upperHalf = hiValue + address = upperHalf + lowerHalf + if address == self.symbolLoInstrOffset[lowerOffset]: + # Make an exception if the resulting address is the same + pass + else: + return self.symbolLoInstrOffset[lowerOffset] + */ + + if let Some((upper_half, _hi_rom)) = upper_info { + if *upper_half < 0 { + None + } else if lower_half.is_negative() + && lower_half.inner().abs() as u32 > *upper_half as u32 + { + None + } else { + Some(Vram::new(*upper_half as u32) + lower_half) + } + } else if let Some(gp_value) = context.global_config().gp_config().map(|x| x.gp_value()) { + // TODO: implement comparison for Vram and VramOffset + if lower_half.is_negative() && lower_half.inner().abs() as u32 > gp_value.inner() { + None + } else { + Some(gp_value + lower_half) + } + } else { + None + } + } + + fn process_got_symbol(&mut self, _address: Vram, _instr_rom: RomAddress) { + // TODO + } + + fn process_address( + &mut self, + context: &Context, + address: Vram, + upper_info: &Option<(i64, RomAddress)>, + instr: &Instruction, + instr_rom: RomAddress, + ) -> bool { + /* + # filter out stuff that may not be a real symbol + filterOut = False + if not self.context.totalVramRange.isInRange(address): + if common.GlobalConfig.SYMBOL_FINDER_FILTER_LOW_ADDRESSES or common.GlobalConfig.SYMBOL_FINDER_FILTER_HIGH_ADDRESSES: + filterOut |= common.GlobalConfig.SYMBOL_FINDER_FILTER_LOW_ADDRESSES and address < common.GlobalConfig.SYMBOL_FINDER_FILTER_ADDRESSES_ADDR_LOW + filterOut |= common.GlobalConfig.SYMBOL_FINDER_FILTER_HIGH_ADDRESSES and address >= common.GlobalConfig.SYMBOL_FINDER_FILTER_ADDRESSES_ADDR_HIGH + else: + filterOut |= True + + if filterOut: + contextSym = self.context.globalSegment.getSymbol(address) + if contextSym is not None: + if contextSym.isUserDeclared: + # If the user declared a symbol outside the total vram range then use it anyways + filterOut = False + + if address > 0 and filterOut and lowerInstr.uniqueId != rabbitizer.InstrId.cpu_addiu: + if common.GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_CONSTANTS: + # Let's pretend this value is a constant + constant = address + self.referencedConstants.add(constant) + + self.constantLoInstrOffset[lowerOffset] = constant + self.constantInstrOffset[lowerOffset] = constant + if luiOffset is not None: + self.constantHiInstrOffset[luiOffset] = constant + self.constantInstrOffset[luiOffset] = constant + + self.hiToLowDict[luiOffset] = lowerOffset + self.lowToHiDict[lowerOffset] = luiOffset + return None + */ + + self.add_referenced_vram(context, instr_rom, address); + + if self + .address_per_lo_instr + .insert(instr_rom, address) + .is_none() + { + self.address_per_instr.insert(instr_rom, address); + } + if let Some((_upper_half, hi_rom)) = upper_info { + if self.address_per_hi_instr.insert(*hi_rom, address).is_none() { + self.address_per_instr.insert(*hi_rom, address); + self.add_referenced_vram(context, *hi_rom, address); + } + /* + self.hiToLowDict[luiOffset] = lowerOffset + self.lowToHiDict[lowerOffset] = luiOffset + */ + } else { + /* + self.symbolGpInstrOffset[lowerOffset] = address + self.gpReferencedSymbols.add(address) + self.symbolInstrOffset[lowerOffset] = address + */ + } + + self.process_symbol_type(address, instr, instr_rom); + + true + } + + fn process_symbol_type( + &mut self, + _address: Vram, + _instr: &Instruction, + _instr_rom: RomAddress, + ) { + // TODO + } } impl InstructionAnalysisResult { + fn rom_from_instr(&self, instr: &Instruction) -> RomAddress { + self.ranges + .rom_from_vram(instr.vram()) + .expect("This should not panic") + } + fn add_referenced_vram( &mut self, context: &Context, instr_rom: RomAddress, referenced_vram: Vram, ) { - if let Some(gp_config) = context.global_config().gp_config() { - if !gp_config.pic() { - self.referenced_vrams.insert(referenced_vram); - self.referenced_vrams_by_rom - .insert(instr_rom, referenced_vram); - } + if !context.global_config().gp_config().is_some_and(|x| x.pic()) { + self.referenced_vrams.insert(referenced_vram); + self.referenced_vrams_by_rom + .insert(instr_rom, referenced_vram); } } } diff --git a/src/spimdisasm/src/analysis/lo_pairing_info.rs b/src/spimdisasm/src/analysis/lo_pairing_info.rs new file mode 100644 index 0000000..8dfef2f --- /dev/null +++ b/src/spimdisasm/src/analysis/lo_pairing_info.rs @@ -0,0 +1,11 @@ +/* SPDX-FileCopyrightText: © 2024 Decompollaborate */ +/* SPDX-License-Identifier: MIT */ + +use crate::rom_address::RomAddress; + +pub struct LoPairingInfo { + pub(crate) instr_rom: RomAddress, + pub(crate) value: i64, // TODO: This is fishy + pub(crate) is_gp_rel: bool, + pub(crate) is_gp_got: bool, +} diff --git a/src/spimdisasm/src/analysis/mod.rs b/src/spimdisasm/src/analysis/mod.rs index 6fd9d39..93a5b52 100644 --- a/src/spimdisasm/src/analysis/mod.rs +++ b/src/spimdisasm/src/analysis/mod.rs @@ -3,10 +3,12 @@ mod instruction_analysis_result; mod instruction_analyzer; +mod lo_pairing_info; mod register_tracker; mod tracked_register_state; pub use instruction_analysis_result::InstructionAnalysisResult; pub(crate) use instruction_analyzer::InstructionAnalyzer; +pub(crate) use lo_pairing_info::LoPairingInfo; pub(crate) use register_tracker::RegisterTracker; pub(crate) use tracked_register_state::TrackedRegisterState; diff --git a/src/spimdisasm/src/analysis/register_tracker.rs b/src/spimdisasm/src/analysis/register_tracker.rs index 2a187b9..b5662f7 100644 --- a/src/spimdisasm/src/analysis/register_tracker.rs +++ b/src/spimdisasm/src/analysis/register_tracker.rs @@ -1,9 +1,11 @@ /* SPDX-FileCopyrightText: © 2024 Decompollaborate */ /* SPDX-License-Identifier: MIT */ -use rabbitizer::{registers::Gpr, traits::Register, Instruction}; +use rabbitizer::{opcodes::Opcode, registers::Gpr, traits::Register, Instruction}; -use super::TrackedRegisterState; +use crate::rom_address::RomAddress; + +use super::{LoPairingInfo, TrackedRegisterState}; #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct RegisterTracker { @@ -42,7 +44,174 @@ impl RegisterTracker { } } - pub(crate) fn process_branch(&mut self, _instr: &Instruction) { + pub(crate) fn process_branch(&mut self, instr: &Instruction, instr_rom: RomAddress) { + assert!(instr.get_branch_offset_generic().is_some()); + + if let Some(reg) = instr.field_rs() { + if instr.opcode().reads_rs() { + self.registers[reg.as_index()].set_branching(instr_rom); + } + } + if let Some(reg) = instr.field_rt() { + if instr.opcode().reads_rt() { + self.registers[reg.as_index()].set_branching(instr_rom); + } + } + if let Some(reg) = instr.field_rd() { + if instr.opcode().reads_rd() { + self.registers[reg.as_index()].set_branching(instr_rom); + } + } + } + + pub(crate) fn process_hi( + &mut self, + instr: &Instruction, + instr_rom: RomAddress, + prev_instr: Option<&Instruction>, + ) { + assert!(instr.opcode().can_be_hi()); + + let reg = instr + .get_destination_gpr() + .expect("lui should have dst register"); + let state = &mut self.registers[reg.as_index()]; + + state.clear(); + state.set_hi( + instr + .get_processed_immediate() + .expect("lui should have an immediate field") as u32, + instr_rom, + prev_instr, + ); + } + + pub(crate) fn process_gp_load(&mut self, instr: &Instruction, instr_rom: RomAddress) { + assert!(instr.opcode().can_be_lo()); + + let reg = instr + .get_destination_gpr() + .expect("should have dst register"); + let state = &mut self.registers[reg.as_index()]; + + state.clear(); + state.set_gp_load( + instr + .get_processed_immediate() + .expect("should have immediate field") as u32, + instr_rom, + ); + } + + pub(crate) fn process_lo(&mut self, instr: &Instruction, value: u32, instr_rom: RomAddress) { + if let Some(dst_reg) = instr.get_destination_gpr() { + let state = &mut self.registers[dst_reg.as_index()]; + state.set_lo(value, instr_rom); + if instr.opcode().does_dereference() { + state.set_deref(instr_rom); + } + if Some(dst_reg) == instr.field_rs() { + state.clear_hi(); + state.clear_gp(); + } + state.clear_branch(); + } + } + + pub(crate) fn overwrite_registers(&mut self, instr: &Instruction, instr_rom: RomAddress) { + if self.move_register(instr) { + return; + } + + match instr.opcode() { + Opcode::core_mtc1 | Opcode::core_dmtc1 | Opcode::core_ctc1 => { + // IDO usually use a reg as a temp when loading a constant value + // into the float coprocessor, after that IDO never re-uses the value + // in that reg for anything else + self.clear_reg(instr.field_rt().expect("This should not panic"), instr_rom); + } + _ => { + if let Some(reg) = instr.get_destination_gpr() { + if instr.opcode().can_be_hi() { + self.registers[reg.as_index()].clear_lo(); + } else { + self.clear_reg(reg, instr_rom); + } + } + } + } + } + + pub(crate) fn preprocess_lo_and_get_info( + &mut self, + instr: &Instruction, + instr_rom: RomAddress, + ) -> Option { + if let Some(reg) = instr.field_rs() { + let state = &self.registers[reg.as_index()]; + + if let Some(hi_info) = state.hi_info() { + if !hi_info.set_on_branch_likely { + return Some(LoPairingInfo { + instr_rom: hi_info.instr_rom, + value: state.value() as i64, + is_gp_rel: false, + is_gp_got: false, + }); + } + } else if reg.is_global_pointer(instr.abi()) { + return Some(LoPairingInfo { + instr_rom: RomAddress::new(0), + value: state.value() as i64, + is_gp_rel: true, + is_gp_got: false, + }); + } else if let Some(gp_info) = state.gp_info() { + return Some(LoPairingInfo { + instr_rom: gp_info, + value: state.value() as i64, + is_gp_rel: false, + is_gp_got: true, + }); + } + + if let Some(rt) = instr.field_rt() { + if instr.opcode().does_dereference() { + if state.lo_info().is_some() && state.dereferenced().is_none() { + // Simulate a dereference + self.registers[rt.as_index()].dereference_from(*state, instr_rom); + self.registers[rt.as_index()].clear_branch(); + } + } + } + } + + None + } + + pub(crate) fn has_lo_but_not_hi(&self, instr: &Instruction) -> bool { + instr.field_rs().is_some_and(|reg| { + let state = self.registers[reg.as_index()]; + state.lo_info().is_some() && state.hi_info().is_none() + }) + } +} + +impl RegisterTracker { + fn move_register(&mut self, _instr: &Instruction) -> bool { // TODO + false + } + + fn clear_reg(&mut self, reg: Gpr, instr_rom: RomAddress) { + let state = &mut self.registers[reg.as_index()]; + + state.clear_hi(); + if !state.was_set_in_current_instr(instr_rom) { + state.clear_gp(); + state.clear_lo(); + } + state.clear_branch(); } } diff --git a/src/spimdisasm/src/analysis/tracked_register_state.rs b/src/spimdisasm/src/analysis/tracked_register_state.rs index 4851d1e..d464889 100644 --- a/src/spimdisasm/src/analysis/tracked_register_state.rs +++ b/src/spimdisasm/src/analysis/tracked_register_state.rs @@ -1,15 +1,130 @@ /* SPDX-FileCopyrightText: © 2024 Decompollaborate */ /* SPDX-License-Identifier: MIT */ +use rabbitizer::Instruction; + +use crate::rom_address::RomAddress; + +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct HiInfo { + pub(crate) instr_rom: RomAddress, + + // If the previous instructions is a branch likely, then nulify + // the effects of this instruction for future analysis + pub(crate) set_on_branch_likely: bool, +} + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct TrackedRegisterState {} +pub struct TrackedRegisterState { + // Maybe wrap in Option? + value: u32, + + // TODO: maybe wrap in an enum? + hi_info: Option, + gp_info: Option, + lo_info: Option, + dereferenced: Option, + branch_info: Option, +} impl TrackedRegisterState { pub(crate) fn new() -> Self { - Self {} + Self { + value: 0, + hi_info: None, + gp_info: None, + lo_info: None, + dereferenced: None, + branch_info: None, + } + } + + pub(crate) fn value(&self) -> u32 { + self.value + } + pub(crate) fn hi_info(&self) -> Option { + self.hi_info + } + pub(crate) fn gp_info(&self) -> Option { + self.gp_info + } + pub(crate) fn lo_info(&self) -> Option { + self.lo_info + } + pub(crate) fn dereferenced(&self) -> Option { + self.dereferenced + } +} + +impl TrackedRegisterState { + pub fn clear(&mut self) { + self.value = 0; + + self.clear_hi(); + self.clear_gp(); + self.clear_lo(); + self.clear_branch(); + } + + pub fn clear_hi(&mut self) { + self.hi_info = None; + } + pub fn clear_gp(&mut self) { + self.gp_info = None; + } + pub fn clear_lo(&mut self) { + self.lo_info = None; + self.dereferenced = None; + } + pub fn clear_branch(&mut self) { + self.branch_info = None; } } impl TrackedRegisterState { - pub fn clear(&mut self) {} + pub fn set_hi(&mut self, value: u32, instr_rom: RomAddress, prev_instr: Option<&Instruction>) { + assert!(self.gp_info.is_none()); + self.value = value << 16; + + self.hi_info = Some(HiInfo { + instr_rom, + set_on_branch_likely: prev_instr + .is_some_and(|x| x.opcode().is_branch_likely() || x.is_unconditional_branch()), + }); + self.dereferenced = None; + } + + pub fn set_gp_load(&mut self, value: u32, instr_rom: RomAddress) { + assert!(self.hi_info.is_none()); + self.value = value; + + self.gp_info = Some(instr_rom); + } + + pub fn set_lo(&mut self, value: u32, instr_rom: RomAddress) { + self.value = value; + + self.lo_info = Some(instr_rom); + self.dereferenced = None; + } + + pub fn set_branching(&mut self, instr_rom: RomAddress) { + self.branch_info = Some(instr_rom); + } + + pub fn set_deref(&mut self, instr_rom: RomAddress) { + self.dereferenced = Some(instr_rom); + } + + pub fn dereference_from(&mut self, other: Self, instr_rom: RomAddress) { + *self = other; + self.set_deref(instr_rom); + } +} + +impl TrackedRegisterState { + pub fn was_set_in_current_instr(&self, _instr_rom: RomAddress) -> bool { + // TODO + false + } } diff --git a/src/spimdisasm/src/context/context_builder.rs b/src/spimdisasm/src/context/context_builder.rs index 4684c31..ac194cf 100644 --- a/src/spimdisasm/src/context/context_builder.rs +++ b/src/spimdisasm/src/context/context_builder.rs @@ -208,24 +208,7 @@ pub struct ContextBuilderOverlay { } impl ContextBuilderOverlay { - /* - pub fn add_overlay(&mut self, category: OverlayCategoryName, - rom_range: AddressRange, - vram_range: AddressRange,) -> SegmentModifier { - - let segment = self.overlay_segments - .entry( - category.clone() - ) - .or_insert_with( - || OverlayCategory::new(category, rom_range, vram_range) - ); - - SegmentModifier { - segment - } - } - */ + #[must_use] pub fn add_overlay_category(&mut self, category: OverlayCategoryName) -> OverlaysBuilder { OverlaysBuilder { name: category.clone(), diff --git a/src/spimdisasm/src/context/the_context.rs b/src/spimdisasm/src/context/the_context.rs index 9d1ec9e..0c223ba 100644 --- a/src/spimdisasm/src/context/the_context.rs +++ b/src/spimdisasm/src/context/the_context.rs @@ -18,8 +18,9 @@ use crate::{ #[derive(Debug, Clone, Hash, PartialEq)] pub struct OverlayCategory { - placeholder_segment: SegmentMetadata, - segments: BTreeMap, + // TODO: remove `pub`s + pub placeholder_segment: SegmentMetadata, + pub segments: BTreeMap, } impl OverlayCategory { @@ -72,12 +73,18 @@ impl Context { } impl Context { + #[must_use] pub const fn global_config(&self) -> &GlobalConfig { &self.global_config } + #[must_use] pub const fn global_segment(&self) -> &SegmentMetadata { &self.global_segment } + #[must_use] + pub const fn overlay_segments(&self) -> &BTreeMap { + &self.overlay_segments + } } #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] @@ -154,16 +161,17 @@ impl Context { } } } + } - // If not found, then we should check every category except the one that associated to the parent segment. - for (ovl_cat, segments_per_rom) in self.overlay_segments.iter() { - if overlay_category_name == ovl_cat { - continue; - } - let segment = &segments_per_rom.placeholder_segment; - if segment.in_vram_range(vram) { - return Some(segment); - } + let overlay_category_name = info.overlay_category_name(); + // If not found, then we should check every category except the one that associated to the parent segment. + for (ovl_cat, segments_per_rom) in self.overlay_segments.iter() { + if overlay_category_name == Some(ovl_cat) { + continue; + } + let segment = &segments_per_rom.placeholder_segment; + if segment.in_vram_range(vram) { + return Some(segment); } } @@ -192,16 +200,17 @@ fn find_referenced_segment_mut_impl<'ctx>( } } }); + } - // If not found, then we should check every category except the one that associated to the parent segment. - for (ovl_cat, segments_per_rom) in slf.overlay_segments.iter_mut() { - if overlay_category_name == ovl_cat { - continue; - } - let segment = &mut segments_per_rom.placeholder_segment; - if segment.in_vram_range(vram) { - return Some(segment); - } + let overlay_category_name = info.overlay_category_name(); + // If not found, then we should check every category except the one that associated to the parent segment. + for (ovl_cat, segments_per_rom) in slf.overlay_segments.iter_mut() { + if overlay_category_name == Some(ovl_cat) { + continue; + } + let segment = &mut segments_per_rom.placeholder_segment; + if segment.in_vram_range(vram) { + return Some(segment); } } diff --git a/src/spimdisasm/src/metadata/symbol_metadata.rs b/src/spimdisasm/src/metadata/symbol_metadata.rs index 208ed69..6bb9718 100644 --- a/src/spimdisasm/src/metadata/symbol_metadata.rs +++ b/src/spimdisasm/src/metadata/symbol_metadata.rs @@ -355,7 +355,7 @@ impl fmt::Debug for SymbolMetadata { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "SymbolMetadata {{ vram: 0x{}, name: {} }}", + "SymbolMetadata {{ vram: 0x{}, name: \"{}\" }}", self.vram, self.display_name() ) diff --git a/src/spimdisasm/src/relocation/reloc_referenced_sym.rs b/src/spimdisasm/src/relocation/reloc_referenced_sym.rs index 2fe4a7c..893cef5 100644 --- a/src/spimdisasm/src/relocation/reloc_referenced_sym.rs +++ b/src/spimdisasm/src/relocation/reloc_referenced_sym.rs @@ -8,5 +8,5 @@ use rabbitizer::Vram; #[non_exhaustive] pub enum RelocReferencedSym { Address(Vram), - SymName(String), + SymName(String, i32), } diff --git a/src/spimdisasm/src/relocation/relocation_info.rs b/src/spimdisasm/src/relocation/relocation_info.rs index 7207a4c..64faec1 100644 --- a/src/spimdisasm/src/relocation/relocation_info.rs +++ b/src/spimdisasm/src/relocation/relocation_info.rs @@ -17,20 +17,14 @@ use super::{RelocReferencedSym, RelocationType}; pub struct RelocationInfo { reloc_type: RelocationType, referenced_sym: RelocReferencedSym, - addend: i32, } impl RelocationInfo { #[must_use] - pub fn new( - reloc_type: RelocationType, - referenced_sym: RelocReferencedSym, - addend: i32, - ) -> Self { + pub fn new(reloc_type: RelocationType, referenced_sym: RelocReferencedSym) -> Self { Self { reloc_type, referenced_sym, - addend, } } @@ -42,10 +36,6 @@ impl RelocationInfo { pub const fn referenced_sym(&self) -> &RelocReferencedSym { &self.referenced_sym } - #[must_use] - pub const fn addend(&self) -> i32 { - self.addend - } pub fn display<'ctx, 'rel, 'prnt>( &'rel self, @@ -58,8 +48,8 @@ impl RelocationInfo { #[derive(Debug, Clone, Copy, Hash, PartialEq, PartialOrd)] enum RelocSymState<'name, 'meta> { - LiteralSymName(&'name str), - Sym(&'meta SymbolMetadata), + LiteralSymName(&'name str, i32), + Sym(Vram, &'meta SymbolMetadata), // Kinda useful for debugging SymbolNotFound(Vram), // Kinda useful for debugging @@ -80,7 +70,9 @@ impl<'ctx, 'rel, 'prnt> RelocationInfoDisplay<'ctx, 'rel, 'prnt> { segment_info: &'prnt ParentSegmentInfo, ) -> Option { let reloc_sym_state = match &rel.referenced_sym { - RelocReferencedSym::SymName(name) => RelocSymState::LiteralSymName(name), + RelocReferencedSym::SymName(name, addend) => { + RelocSymState::LiteralSymName(name, *addend) + } RelocReferencedSym::Address(vram) => { if let Some(referenced_segment) = context.find_referenced_segment(*vram, segment_info) @@ -88,7 +80,7 @@ impl<'ctx, 'rel, 'prnt> RelocationInfoDisplay<'ctx, 'rel, 'prnt> { if let Some(sym_metadata) = referenced_segment .find_symbol(*vram, FindSettings::new().with_allow_addend(false)) { - RelocSymState::Sym(sym_metadata) + RelocSymState::Sym(*vram, sym_metadata) } else { // TODO: make this a setting if false { @@ -142,18 +134,25 @@ impl fmt::Display for RelocationInfoDisplay<'_, '_, '_> { write!(f, "(")?; } - match &self.reloc_sym_state { - RelocSymState::LiteralSymName(name) => write!(f, "{}", name)?, - RelocSymState::Sym(sym_metadata) => write!(f, "{}", sym_metadata.display_name())?, + let addend = match &self.reloc_sym_state { + RelocSymState::LiteralSymName(name, addend) => { + write!(f, "{}", name)?; + *addend + } + RelocSymState::Sym(vram, sym_metadata) => { + write!(f, "{}", sym_metadata.display_name())?; + (*vram - sym_metadata.vram()).inner() + } RelocSymState::SymbolNotFound(vram) => { - write!(f, "/* ERROR: symbol for address 0x{} not found */", vram)? + write!(f, "/* ERROR: symbol for address 0x{} not found */", vram)?; + 0 } RelocSymState::SegmentNotFound(vram) => { - write!(f, "/* ERROR: segment for address 0x{} not found */", vram)? + write!(f, "/* ERROR: segment for address 0x{} not found */", vram)?; + 0 } - } + }; - let addend = self.rel.addend; if addend != 0 { /* if GlobalConfig.COMPILER.value.bigAddendWorkaroundForMigratedFunctions and isSplittedSymbol: diff --git a/src/spimdisasm/src/relocation/relocation_type.rs b/src/spimdisasm/src/relocation/relocation_type.rs index 71de764..a7fceff 100644 --- a/src/spimdisasm/src/relocation/relocation_type.rs +++ b/src/spimdisasm/src/relocation/relocation_type.rs @@ -112,7 +112,7 @@ impl RelocationType { } #[must_use] - pub fn new_reloc_info(self, referenced_sym: RelocReferencedSym, addend: i32) -> RelocationInfo { - RelocationInfo::new(self, referenced_sym, addend) + pub fn new_reloc_info(self, referenced_sym: RelocReferencedSym) -> RelocationInfo { + RelocationInfo::new(self, referenced_sym) } } diff --git a/src/spimdisasm/src/symbols/symbol_function.rs b/src/spimdisasm/src/symbols/symbol_function.rs index 2c18cd9..7493840 100644 --- a/src/spimdisasm/src/symbols/symbol_function.rs +++ b/src/spimdisasm/src/symbols/symbol_function.rs @@ -55,6 +55,8 @@ impl SymbolFunction { *sym.autodetected_size_mut() = Some(size); sym.set_defined(); + // TOOD: Consider moving reloc generation to a later step + for (instr_rom, target_vram) in instr_analysis.branch_targets() { /* if common.GlobalConfig.INPUT_FILE_TYPE == common.InputFileType.ELF: @@ -73,7 +75,7 @@ impl SymbolFunction { let instr_index = (*instr_rom - rom).inner() / 4; relocs[instr_index as usize] = Some( RelocationType::R_MIPS_PC16 - .new_reloc_info(RelocReferencedSym::Address(*target_vram), 0), + .new_reloc_info(RelocReferencedSym::Address(*target_vram)), ); /* @@ -112,11 +114,105 @@ impl SymbolFunction { } let instr_index = (*instr_rom - rom).inner() / 4; relocs[instr_index as usize] = Some( - RelocationType::R_MIPS_26 - .new_reloc_info(RelocReferencedSym::Address(*target_vram), 0), + RelocationType::R_MIPS_26.new_reloc_info(RelocReferencedSym::Address(*target_vram)), + ); + } + + for (instr_rom, symbol_vram) in instr_analysis.address_per_lo_instr() { + /* + if self.context.isAddressBanned(symVram): + continue + */ + /* + if common.GlobalConfig.INPUT_FILE_TYPE == common.InputFileType.ELF: + if self.getVromOffset(loOffset) in self.context.globalRelocationOverrides: + # Avoid creating wrong symbols on elf files + continue + */ + /* + symAccessDict = self.instrAnalyzer.possibleSymbolTypes.get(symVram, dict()) + symAccess = None + if len(symAccessDict) == 1: + # Infer type info if there's only one access type + symAccess = list(symAccessDict)[0] + */ + + if let Some(referenced_segment) = + context.find_referenced_segment_mut(*symbol_vram, &parent_segment_info) + { + let _sym = referenced_segment.add_symbol( + *symbol_vram, + None, + GeneratedBy::Autogenerated, + None, + true, + ); + /* + contextSym = self.addSymbol(symVram, isAutogenerated=True, allowAddendInstead=True) + if contextSym is not None: + # TODO: do this in a less ugly way + if contextSym.address != symVram: + if contextSym.address % 4 != 0 or symVram % 4 != 0: + if contextSym.getType() in {"u16", "s16", "u8", "u8"} or (symAccess is not None and symAccess.accessType in {rabbitizer.AccessType.BYTE, rabbitizer.AccessType.SHORT}): + if not (contextSym.getSize() > 4): + if contextSym.userDeclaredSize is None or symVram >= contextSym.address + contextSym.userDeclaredSize: + if symAccess is not None: + contextSym.setAccessTypeIfUnset(symAccess.accessType, symAccess.unsignedMemoryAccess) + contextSym.setFirstLoAccessIfUnset(loOffset) + contextSym = self.addSymbol(symVram, isAutogenerated=True) + */ + + /* + contextSym.referenceCounter += 1 + contextSym.referenceFunctions.add(self.contextSym) + contextSym.setFirstLoAccessIfUnset(loOffset) + if symAccess is not None: + contextSym.setAccessTypeIfUnset(symAccess.accessType, symAccess.unsignedMemoryAccess) + if contextSym.isAutogenerated: + # Handle mips1 doublefloats + if contextSym.accessType == rabbitizer.AccessType.FLOAT and common.GlobalConfig.ABI == common.Abi.O32: + instr = self.instructions[loOffset//4] + if instr.doesDereference() and instr.isFloat() and not instr.isDouble(): + if instr.ft.value % 2 != 0: + # lwc1/swc1 with an odd fpr means it is an mips1 doublefloats reference + if symVram % 8 != 0: + # We need to remove the the symbol pointing to the middle of this doublefloats + got = contextSym.isGot + gotLocal = contextSym.isGotLocal + gotGlobal = contextSym.isGotGlobal + self.removeSymbol(symVram) + + # Align down to 8 + symVram = (symVram >> 3) << 3 + contextSym = self.addSymbol(symVram, isAutogenerated=True) + contextSym.referenceCounter += 1 + contextSym.referenceFunctions.add(self.contextSym) + contextSym.setFirstLoAccessIfUnset(loOffset) + contextSym.isGot = got + contextSym.isGotLocal = gotLocal + contextSym.isGotGlobal = gotGlobal + contextSym.accessType = rabbitizer.AccessType.DOUBLEFLOAT + contextSym.unsignedAccessType = False + contextSym.isMips1Double = True + */ + } + + let instr_index = (*instr_rom - rom).inner() / 4; + relocs[instr_index as usize] = Some( + RelocationType::R_MIPS_LO16 + .new_reloc_info(RelocReferencedSym::Address(*symbol_vram)), + ); + } + for (instr_rom, symbol_vram) in instr_analysis.address_per_hi_instr() { + let instr_index = (*instr_rom - rom).inner() / 4; + relocs[instr_index as usize] = Some( + RelocationType::R_MIPS_HI16 + .new_reloc_info(RelocReferencedSym::Address(*symbol_vram)), ); } + // self._generateRelocsFromInstructionAnalyzer() + Ok(Self { ranges, instructions, diff --git a/src/spimdisasm/tests/game_tests.rs b/src/spimdisasm/tests/game_tests.rs index 4cc9371..9c102ba 100644 --- a/src/spimdisasm/tests/game_tests.rs +++ b/src/spimdisasm/tests/game_tests.rs @@ -328,7 +328,7 @@ fn drmario64_us_without_symbols() { } } - assert_eq!(context.global_segment().symbols().len(), 7992); + assert_eq!(context.global_segment().symbols().len(), 9112); /* for seg in &segments { @@ -346,7 +346,7 @@ fn drmario64_us_without_symbols() { .sum::() }) .sum(); - assert_eq!(function_count, 1402); + assert_eq!(function_count, 1404); let data_syms_count: usize = segments .iter() @@ -357,7 +357,7 @@ fn drmario64_us_without_symbols() { .sum::() }) .sum(); - assert_eq!(data_syms_count, 57); + assert_eq!(data_syms_count, 279); } #[cfg_attr(feature = "game_tests", test)] @@ -397,7 +397,7 @@ fn drmario64_us_with_symbols() { } } - assert_eq!(context.global_segment().symbols().len(), 9460); + assert_eq!(context.global_segment().symbols().len(), 9532); /* for seg in &segments { diff --git a/src/spimdisasm/tests/test_section_text.rs b/src/spimdisasm/tests/test_section_text.rs index 6b31262..be9f7a8 100644 --- a/src/spimdisasm/tests/test_section_text.rs +++ b/src/spimdisasm/tests/test_section_text.rs @@ -6,6 +6,7 @@ use spimdisasm::{ address_range::AddressRange, config::{Endian, GlobalConfig}, context::ContextBuilder, + metadata::OverlayCategoryName, parent_segment_info::ParentSegmentInfo, rom_address::RomAddress, sections::{SectionText, SectionTextSettings}, @@ -95,20 +96,42 @@ fn test_section_text_1() { ]; let rom = RomAddress::new(0x001050); let vram = Vram::new(0x80000400); - let size = Size::new(0x1000); + let size = Size::new(0x21FC00); let text_settings = SectionTextSettings::new(InstructionFlags::new()); let global_config = GlobalConfig::new(Endian::Big); let mut context = { - let mut heater = ContextBuilder::new( + let mut overlays_builder = ContextBuilder::new( global_config, AddressRange::new(rom, rom + size), AddressRange::new(vram, vram + size), ) - .process() .process(); + for i in 0x0..=0xF { + let category_name = OverlayCategoryName::new(format!("segment_0{:X}", i)); + let mut overlay_builder = overlays_builder.add_overlay_category(category_name.clone()); + + let magic_number = 0x01000000; + let segment_size = Size::new(magic_number); + let segment_vram = Vram::new(i * magic_number); + let vram_range = AddressRange::new(segment_vram, segment_vram + segment_size); + let arbitrary_number = 128 * 1024 * 1024; // 128MiB, no rom should be that big, right? + let segment_rom = RomAddress::new(arbitrary_number + i * magic_number); + let rom_range = AddressRange::new(segment_rom, segment_rom + segment_size); + + println!( + "Adding overlay '{:?}': {:?} {:?}", + category_name, rom_range, vram_range + ); + + overlay_builder.add_overlay(rom_range, vram_range); + overlay_builder.build().unwrap(); + } + + let mut heater = overlays_builder.process(); + heater.preanalyze_text(&text_settings, &bytes, rom, vram); heater.process().build() @@ -139,7 +162,25 @@ fn test_section_text_1() { for s in symbols { println!("{:?}", s.1); } - assert_eq!(symbols.len(), 4); + assert_eq!(symbols.len(), 7); + + println!(); + let overlays_data = context + .overlay_segments() + .get(&OverlayCategoryName::new("segment_01".into())) + .unwrap(); + println!("placeholder:"); + for sym in overlays_data.placeholder_segment.symbols() { + println!("{:?}", sym); + } + println!(); + println!("other:"); + for (segment_rom, segment_metadata) in &overlays_data.segments { + println!(" {:?}", segment_rom,); + for sym in segment_metadata.symbols() { + println!(" {:?}", sym); + } + } // None::.unwrap(); }