Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP 32bit #4

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion ida_kernelcache/class_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,7 @@ def _convert_operands_to_struct_offsets(access_addresses):
if insn:
for op in insn.Operands:
if op.type == idaapi.o_displ:
if not idc.OpStroffEx(ea, op.n, sid, delta):
if not idc.OpStroffEx(insn, op.n, sid, delta):
_log(1, 'Could not convert {:#x} to struct offset for class {} '
'delta {}', ea, classname, delta)

Expand Down
199 changes: 186 additions & 13 deletions ida_kernelcache/collect_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,15 @@

_MEMOP_WBINDEX = _MEMOP_PREINDEX | _MEMOP_POSTINDEX

# on 64bit devices __DATA_CONST segment is used for constant data
# instead of __DATA (eg __DATA_CONST.__const instead of __DATA.__const)
if idau.WORD_SIZE == 4:
_CONST_SEGNAME = '__DATA'
else:
_CONST_SEGNAME = '__DATA_CONST'

class _Regs(object):
"""A set of registers for _emulate_arm64."""
"""A set of registers for _emulate_arm64/32."""

class _Unknown:
"""A wrapper class indicating that the value is unknown."""
Expand All @@ -52,8 +59,13 @@ def clear(self, reg):
pass

def _reg(self, reg):
if type(reg) is int:
if isinstance(reg, (int, long)):
reg = _Regs._reg_names[reg]

# Automatically map Rn to Xn
if reg[0] == 'R' and reg[1:].isdigit():
reg = 'X' + reg[1:]

return reg

def __getitem__(self, reg):
Expand All @@ -64,11 +76,11 @@ def __getitem__(self, reg):

def __setitem__(self, reg, value):
if value is None or value is _Regs.Unknown:
self.clear(reg)
self.clear(self._reg(reg))
else:
self._regs[self._reg(reg)] = value & 0xffffffffffffffff

def _emulate_arm64(start, end, on_BL=None, on_RET=None):
def _emulate_arm64(start, end=None, count=None, on_BL=None, on_RET=None):
"""A very basic partial Arm64 emulator that does just enough to find OSMetaClass
information."""
# Super basic emulation.
Expand All @@ -86,8 +98,7 @@ def load(addr, dtyp):
def cleartemps():
for t in ['X{}'.format(i) for i in range(0, 19)]:
reg.clear(t)
for insn in idau.Instructions(start, end):
_log(11, 'Processing instruction {:#x}', insn.ea)
for insn in idau.Instructions(start, end=end, count=count):
mnem = insn.get_canon_mnem()
if mnem == 'ADRP' or mnem == 'ADR':
reg[insn.Op1.reg] = insn.Op2.value
Expand Down Expand Up @@ -123,6 +134,167 @@ def cleartemps():
_log(10, 'Unrecognized instruction at address {:#x}', insn.ea)
reg.clearall()

def _emulate_arm32(start, end=None, count=None, on_BL=None, on_RET=None):
"""A very basic partial Arm32 emulator that does just enough to find OSMetaClass
information."""
# Super basic emulation.
reg = _Regs()
def load(addr, dtyp):
if not addr:
return None
if dtyp == idaapi.dt_dword:
size = 4
else:
return None
return idau.read_word(addr, size)
def cleartemps():
for t in ['R{}'.format(i) for i in range(0, 12)]:
reg.clear(t)

# Handle thumb stuff
start = start & ~1
if end is not None:
end = (end + 1) & ~1

# if bl is found, lr is replaced, and marked dirty
# if pop {... lr ...} is found, lr is assumed to be restored to
# original, "clean" state
lr_dirty = False

# Special registers have special handling
_SP_REG = 13
_LR_REG = 14
_PC_REG = 15

for insn in idau.Instructions(start, end=end, count=count):
mnem = insn.get_canon_mnem()
_log(12, 'Regs: {}', reg._regs)
_log(11, 'Processing instruction {} at {:#x}', mnem, insn.ea)
if mnem == 'ADR':
reg[insn.Op1.reg] = insn.Op2.value
elif ((mnem == 'ADD' or mnem == 'SUB')
and insn.Op1.type == insn.Op2.type == idc.o_reg
and insn.Op1.reg == insn.Op2.reg == _SP_REG):
# ignore add/sub on on SP
pass
elif mnem in ('ADD', 'ORR', 'SUB') and insn.Op2.type == idc.o_reg and insn.Op3.type == idc.o_imm:
# There might be more operations, but in practice
# add/sub/orr are enough

# Don't bother checking if src register is unknown and
# just mark dst register as unknown too
if isinstance(reg[insn.Op2.reg], _Regs._Unknown):
reg.clear(insn.Op1.reg)
else:
tmp = reg[insn.Op2.reg]
if mnem == 'ADD':
tmp += insn.Op3.value
elif mnem == 'SUB':
tmp -= insn.Op3.value
elif mnem == 'ORR':
tmp |= insn.Op3.value
else:
pass
reg[insn.Op1.reg] = tmp
elif mnem == 'ADD' and insn.Op3.type == idaapi.o_void:
# Don't bother checking if it's unknown
if not isinstance(reg[insn.Op1.reg], _Regs._Unknown):
if insn.Op2.type == idc.o_imm:
# ADD Rx, <imm>
reg[insn.Op1.reg] = reg[insn.Op1.reg] + insn.Op2.value
elif insn.Op2.type == idc.o_reg and insn.Op2.reg == _PC_REG:
# ADD Rx, PC -- special handling
# On ARM PC is "address of current instruction + 4"
# for historical reasons
reg[insn.Op1.reg] = reg[insn.Op1.reg] + insn.ea + 4
elif mnem == 'NOP':
pass
elif mnem == 'MOV' and insn.Op2.type == idc.o_imm:
reg[insn.Op1.reg] = insn.Op2.value
elif mnem == 'MOV' and insn.Op2.type == idc.o_reg:
reg[insn.Op1.reg] = reg[insn.Op2.reg]
elif mnem == 'BX' and insn.Op1.type == idc.o_reg and insn.Op1.reg == _LR_REG:
# bx lr is often used for ret
if on_RET:
on_RET(reg)
break
elif mnem == 'POP' and insn.Op1.type in (idc.o_idpspec1, idc.o_reg):
poped = []

# Either it's one register pop'ped
if insn.Op1.type == idc.o_reg:
poped.append(insn.Op1.reg)

# Or whole set of them, identified by specval bits
if insn.Op1.type == idc.o_idpspec1:
for i in range(0, 16):
if insn.Op1.specval & (1<<i):
poped.append(i)

for i in poped:
reg.clear(i)

if _PC_REG in poped:
# pop {...pc...} is another way for ret
if on_RET:
on_RET(reg)
break
elif _LR_REG in poped:
lr_dirty = False
elif mnem == 'BL' and insn.Op1.type == idc.o_near:
if on_BL:
on_BL(insn.Op1.addr, reg)
cleartemps()
lr_dirty = True
elif (mnem == 'B' and insn.Op1.type == idc.o_near) or (mnem in ('CBZ', 'CBNZ') and insn.Op2.type == idc.o_near):
dest = insn.Op1.addr if insn.Op2.type == 0 else insn.Op2.addr
if start <= dest <= end:
# silently ignoring branch since start<=dest<=end
# So we check all code not skipping anything because of
# conditions, and also don't get stuck in a loop
continue

if not lr_dirty:
# special case -- when first instruction is branch to
# another place -- means that current function is stub
if insn.ea == start:
_log(11, 'Following {} at {:#x} (to {:#x})', mnem, insn.ea, dest)
_emulate_arm(dest, idc.FindFuncEnd(dest), on_BL=on_BL, on_RET=on_RET, reg=reg)
elif on_RET:
# Consider as bl & ret -- usually happens as a way
# of optimization, when return func2() in the end of
# func1 is replaced by "b _func2"
if on_BL:
on_BL(dest, reg)
cleartemps()
if on_RET:
on_RET(reg)
else:
_log(11, 'NOT Following {} at {:#x} (to {:#x}) and not considering as ret', mnem, insn.ea, dest)
break
elif mnem == 'LDR' and insn.Op2.type == idc.o_mem:
# LDR Rx, =ADDR
reg[insn.Op1.reg] = load(insn.Op2.addr, insn.Op1.dtype)
elif mnem == 'LDR' and insn.Op2.type == idc.o_displ and insn.Op2.value == 0:
# LDR Rx, [Ry]
reg[insn.Op1.reg] = load(reg[insn.Op2.reg], insn.Op1.dtype)
elif mnem == 'PUSH' or mnem == 'STR':
# They don't affect registers directly
pass
else:
# silently clear on V instructions -- they're used pretty
# often but aren't needed for OSMetaClass stuff
if mnem not in ('VMOV', 'VST1', 'VLD1'):
_log(6, 'Unrecognized instruction {} at address {:#x}', mnem, insn.ea)
reg.clearall()

# Universal function
if idau.WORD_SIZE == 4:
_emulate_arm = _emulate_arm32
else: # == 8
_emulate_arm = _emulate_arm64


class _OneToOneMapFactory(object):
"""A factory to extract the largest one-to-one submap."""

Expand Down Expand Up @@ -162,7 +334,7 @@ def build(self, bad_a=None, bad_b=None):

def _process_mod_init_func_for_metaclasses(func, found_metaclass):
"""Process a function from the __mod_init_func section for OSMetaClass information."""
_log(4, 'Processing function {}', idc.GetFunctionName(func))
_log(4, 'Processing function {:#x} ({})', func, idc.GetFunctionName(func))
def on_BL(addr, reg):
X0, X1, X3 = reg['X0'], reg['X1'], reg['X3']
if not (X0 and X1 and X3):
Expand All @@ -172,7 +344,7 @@ def on_BL(addr, reg):
if not idc.SegName(X1).endswith("__TEXT.__cstring") or not idc.SegName(X0):
return
found_metaclass(X0, idc.GetString(X1), X3, reg['X2'] or None)
_emulate_arm64(func, idc.FindFuncEnd(func), on_BL=on_BL)
_emulate_arm(func, idc.FindFuncEnd(func), on_BL=on_BL)

def _process_mod_init_func_section_for_metaclasses(segstart, found_metaclass):
"""Process a __mod_init_func section for OSMetaClass information."""
Expand All @@ -192,7 +364,7 @@ def found_metaclass(metaclass, classname, class_size, meta_superclass):
metaclass_to_meta_superclass[metaclass] = meta_superclass
for ea in idautils.Segments():
segname = idc.SegName(ea)
if not segname.endswith('__DATA_CONST.__mod_init_func'):
if not segname.endswith(_CONST_SEGNAME + '.__mod_init_func'):
continue
_log(2, 'Processing segment {}', segname)
_process_mod_init_func_section_for_metaclasses(ea, found_metaclass)
Expand Down Expand Up @@ -225,8 +397,9 @@ def _get_vtable_metaclass(vtable_addr, metaclass_info):
def on_RET(reg):
on_RET.ret = reg['X0']
on_RET.ret = None
_emulate_arm64(getMetaClass, getMetaClass + idau.WORD_SIZE * _MAX_GETMETACLASS_INSNS,
on_RET=on_RET)

# use count to avoid alignment errors on arm32
_emulate_arm(getMetaClass, count=_MAX_GETMETACLASS_INSNS, on_RET=on_RET)
if on_RET.ret in metaclass_info:
return on_RET.ret

Expand Down Expand Up @@ -254,7 +427,7 @@ def found_vtable(metaclass, vtable, length):
metaclass_to_vtable_builder.add_link(metaclass, vtable)
for ea in idautils.Segments():
segname = idc.SegName(ea)
if not segname.endswith('__DATA_CONST.__const'):
if not segname.endswith(_CONST_SEGNAME + '.__const'):
continue
_log(2, 'Processing segment {}', segname)
_process_const_section_for_vtables(ea, metaclass_info, found_vtable)
Expand Down Expand Up @@ -295,7 +468,7 @@ def bad_vtable(vtable, metaclasses):

def _check_filetype(filetype):
"""Checks that the filetype is compatible before trying to process it."""
return 'Mach-O' in filetype and 'ARM64' in filetype
return 'Mach-O' in filetype and 'ARM' in filetype

def collect_class_info_internal():
"""Collect information about C++ classes defined in a kernelcache.
Expand Down
16 changes: 15 additions & 1 deletion ida_kernelcache/ida_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ def get_ea_name(ea, fromaddr=idc.BADADDR, true=False, user=False):
Returns:
The name of the address or "".
"""
if WORD_SIZE == 4:
s = idc.SegName(ea).lower()
if 'text' in s or 'stub' in s:
ea &= ~1

if user and not idc.hasUserName(idc.GetFlags(ea)):
return ""
if true:
Expand All @@ -148,6 +153,8 @@ def set_ea_name(ea, name, rename=False, auto=False):
Returns:
True if the address was successfully named (or renamed).
"""
if WORD_SIZE == 4:
ea &= ~1
if not rename and idc.hasUserName(idc.GetFlags(ea)):
return get_ea_name(ea) == name
flags = idc.SN_CHECK
Expand Down Expand Up @@ -431,7 +438,11 @@ def _convert_address_to_function(func):
idc.AnalyseArea(item, itemend)
else:
# Just try removing the chunk from its current function.
idc.RemoveFchunk(func, func)
# IDA can add it to another function automatically, so make sure
# it's removed from all functions by doing it in loop until it
# fails
while idc.RemoveFchunk(func, func):
pass
# Now try making a function.
if idc.MakeFunction(func) != 0:
return True
Expand Down Expand Up @@ -463,6 +474,9 @@ def is_function_start(ea):

def force_function(addr):
"""Ensure that the given address is a function type, converting it if necessary."""
# Unset last bin -- so it works with THUMB functions too
# TODO: Consider setting THUMB/ARM mode too
addr &= ~1
if is_function_start(addr):
return True
return _convert_address_to_function(addr)
Expand Down
9 changes: 8 additions & 1 deletion ida_kernelcache/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@

def find_kernel_base():
"""Find the kernel base."""
return idaapi.get_fileregion_ea(0)
kbase = idaapi.get_fileregion_ea(0)

if kbase == idc.BADADDR:
# sometimes kernelcache is a FAT Mach-O with one arch
# sizeof(fat_header) + 1 * sizeof(fat_arch) = 28
kbase = idaapi.get_fileregion_ea(28)

return kbase

base = find_kernel_base()
"""The kernel base address (the address of the main kernel Mach-O header)."""
Expand Down
4 changes: 2 additions & 2 deletions ida_kernelcache/offset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def initialize_data_offsets():
for seg in idautils.Segments():
name = idc.SegName(seg)
if not (name.endswith('__DATA_CONST.__const') or name.endswith('__got')
or name.endswith('__DATA.__data')):
or name.endswith('__DATA.__data') or name.endswith('__nl_symbol_ptr')):
continue
for word, ea in idau.ReadWords(seg, idc.SegEnd(seg), addresses=True):
if idau.is_mapped(word, value=False):
Expand Down Expand Up @@ -95,7 +95,7 @@ def initialize_offset_symbols():
next_offset = internal.make_name_generator(kernelcache_offset_suffix)
for ea in idautils.Segments():
segname = idc.SegName(ea)
if not segname.endswith('__got'):
if not segname.endswith('__got') and not segname.endswith('__nl_symbol_ptr'):
continue
_log(2, 'Processing segment {}', segname)
_process_offsets_section(ea, next_offset)
Expand Down
Loading