Skip to content

Commit

Permalink
Initial Upload
Browse files Browse the repository at this point in the history
  • Loading branch information
maddiestone authored Jun 18, 2017
1 parent f29cbac commit 8880a66
Show file tree
Hide file tree
Showing 7 changed files with 582 additions and 0 deletions.
121 changes: 121 additions & 0 deletions data_offset_calc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
##############################################################################################
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.

##############################################################################################
# data_offset_calc.py
# Resolves the references to indirect offsets of a variable, register, or memory location
# whose value is known. Changes the display of the operand in the instruction (OpAlt function),
# creates a data cross references (add_dref), and creates a comment of the resolved address
# (MakeComment). User nees to define the following:
# offset_var_string: The string representation of the variable, register, or memory
# location to be replaced by the resolved value
# offset_var_value: The value of the variable defined in offset_var_string
# reg_ex_indirect: A regular expression of how indirect offset accesses to the variable
# reg_ex_immediate: A regular expression of how the immediate offset value is represented
# new_opnd_display: A string representation of how the calculated and resolved
# value should be displayed as the operand in the instruction
#
# Inputs: start_addr: Start address for segment to define as data
# end_addr: End address for segment to define as data
#
##############################################################################################
import re


################### USER DEFINED VALUES ###################
# String of the variable/register/location used as the indirect variable
offset_var_str = "fp"

# The defined offset_var_str's value
offset_var_value = 0x808000

# Regular expression for out offset_var_str is referenced indirectly in the IDA Disassembly
# @(-0x(1-8 hex chars), fp )
reg_ex_indirect = re.compile(r"@\(-?0x[0-9A-Fa-f]{1,8}, "+ offset_var_str +"\)")

# Regular expression for how immediate values are shown in the indirect reference
# For this example, it's 0x1044, but some architectures would show that as 1044h
regex_immediate = re.compile(r"0x[0-9A-Fa-f]{1,8}")

# String expression for how the newly calculated instruction should be displayed within the instruction
new_opnd_display = '@[0x%x]'

# OPTIONAL ---- EXAMPLE FOR ADDING OTHER INSTRUCTIONS TO THE PROCESSING
# If you'd like to add other instructions to be processed for resolving indirect offset accesses,
# update the regular expression here and use it as shown in the "else" block below
reg_ex_add3 = re.compile(r"add3 \w\w, fp, #-?0x[0-9A-Fa-f]{1,8}")
#############################################################

start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be analyzed.")
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be analyzed.")

if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
print "[data_offset_calc.py] STARTING. Looking for indirect accesses across 0x%x to 0x%x" % (start_addr, end_addr)
curr_addr = start_addr;
while curr_addr < end_addr:
operand = GetOpnd(curr_addr, 1) # Operand = 2nd Operand in the Instruction at curr_addr
if reg_ex_indirect.match(operand):
print ('[data_offset_calc.py] 0x%x Operand: ' % curr_addr) + operand

# This checks if there are any immediate values also in the 2nd operand with the variable. For example, mov R3, @(0x10, fp)
offset = re.findall(regex_immediate, operand)
if (offset):
print "[data_offset_calc.py] 0x%x Offset: 0x%x" % (curr_addr, int(offset[0],16))

# Check if Immediate Operand is Neg or Pos
if '-' in operand :
new_opnd = offset_var_value - int(offset[0], 16)
else:
new_opnd = offset_var_value + int(offset[0], 16)

print ("[data_offset_calc.py] 0x%x: Offset + " + offset_var_str + " = 0x%0x") % (curr_addr, new_opnd)
OpAlt(curr_addr, 1, new_opnd_display % new_opnd) # Changes Display of Instruction
result = add_dref(curr_addr, new_opnd, dr_T) # Create Data Ref -- Using dref_T because not checking if read or write
print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result)
# Using dr_O (O as in Offset, not 0) because we are not check if this a "write" or "read"
else:
print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr

#####################################################################################
# This block is optional but shows how to add additional regular expressions for other instructions
# you'd like to match besides the general indirect offset acceses. For M32R we are also matching
# the add3 instruction that take the form "add3 Reg, fp, 0xNUM"

else:
instruct = GetDisasm(curr_addr)
if reg_ex_add3.match(instruct):
print ('[data_offset_calc.py] 0x%08x Instruct: ' % curr_addr) + instruct
immed_opnd = GetOpnd(curr_addr, 2) # Getting the 3rd Operand Based on the reg_ex_add3 defined above
offset = re.findall(regex_immediate, immed_opnd);
if offset:
if '-' in immed_opnd:
new_opnd = offset_var_value - int(offset[0], 16)
else:
new_opnd = offset_var_value + int(offset[0], 16)
print '[data_offset_calc.py] 0x%x: Offset + fp = 0x%08x' % (curr_addr, new_opnd)
MakeComm(curr_addr, '0x%08x' % new_opnd) # Add comment with new operand instead of overwriting instruction as done above
result = add_dref(curr_addr, new_opnd, dr_T) # Creates Data XREF from Instruct to Calculated Val
print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result)
else:
print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr
########################################################################################
prev = curr_addr
curr_addr = NextHead(curr_addr, 0xFFFFF)
if (curr_addr == BADADDR):
print "[data_offset_calc.py] EXITING."
break
else:
print "[data_offset_calc.py] QUITTING. Invalid values entered for starting and ending addresses."
92 changes: 92 additions & 0 deletions define_code_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
##############################################################################################
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.

##############################################################################################
# define_code_functions.py
# Attempts to define the bytes in the user-entered address range as code and then as functions
# based on the user-define smart_prolog and smart_epilog regular expressions for that architecture.
#
# Inputs: start_addr: Start address for segment to define as data
# end_addr: End address for segment to define as data
# data_type: Type of data to set segment to (dependent on architecture)
#
##############################################################################################
import re

################### USER DEFINED VALUES ###################
# Enter a regular expression for how this architecture usually begins and ends functions.
# If the architecture does not dictate how to start or end a function use r".*" to allow
# for any instruction

# 8051 Architecture Prologue and Epilogue
smart_prolog = re.compile(r".*")
smart_epilog = re.compile(r"reti{0,1}")

# PIC18 Architecture Prologue and Epilogue
#smart_prolog = re.compile(r".*")
#smart_epilog = re.compile(r"return 0")

# Mitsubishi M32R Architecutre Prologue and Epilogue
#smart_prolog = re.compile(r"push +lr")
#smart_epilog = re.compile(r"jmp +lr.*")

# Texas Instruments TMS320C28x
#smart_prolog = re.compile(r".*")
#smart_epilog = re.compile(r"lretr")

# AVR
#smart_prolog = re.compile(r"push +r")
#smart_epilog = re.compile(r"reti{0,1}")
############################################################

start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.")
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.")

if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR)):
do_make_unk = AskYN(0, "Do you want to make all of the code block UNKNOWN first?")
if (do_make_unk == 1):
curr_addr = start_addr
while (curr_addr < end_addr):
MakeUnkn(curr_addr,idc.DOUNK_SIMPLE)
curr_addr += 1
if (do_make_unk != -1):
curr_addr = start_addr
print "[make_code_functions.py] Running script to define code and functions on 0x%x to 0x%x" % (start_addr, end_addr)
while (curr_addr < end_addr):
next_unexplored = FindUnexplored(curr_addr, idaapi.BIN_SEARCH_FORWARD)
MakeCode(next_unexplored) # We don't care whether it succeeds or fails so not storing retval
curr_addr = next_unexplored

# Finished attempting to make all unexplored bytes into code
# Now, attempt to create functions of all code not currently in a function
print "[make_code_functions.py] Completed attempting to define bytes as code. Now trying to define functions."
curr_addr = start_addr
while (curr_addr != BADADDR and curr_addr < end_addr):
if (isCode(GetFlags(curr_addr)) and GetFunctionAttr(curr_addr, FUNCATTR_START) == BADADDR):
#print "Function Stuffs 0x%0x" % curr_addr
if(smart_prolog.match(GetDisasm(curr_addr)) or smart_epilog.match(GetDisasm(PrevHead(curr_addr)))):
#print "Smart Prolog match"
if (MakeFunction(curr_addr) != 0):
# MakeFunction(curr_addr) was successful so set curr_addr to next addr after the new function
curr_addr = GetFunctionAttr(curr_addr, FUNCATTR_END) # Returns first address AFTER the end of the function
continue
curr_addr = NextHead(curr_addr)
else:
print "[make_code_functions.py] Quitting. Entered address values are not valid."




63 changes: 63 additions & 0 deletions define_data_as_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
##############################################################################################
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.

##############################################################################################
# define_data_as_types.py
# Defines a segment of addresses as the user-specified data type (byte, word, or double word).
# The byte length for each of these types is architecture dependent, but generally:
# 1 byte = Byte
# 2 bytes = Word
# 4 bytes = Double Word
# This script with undefine all bytes in the range first which means if you previously had
# code or strings defined in the area, they will be overwritten as data.
#
# Inputs: start_addr: Start address for segment to define as data
# end_addr: End address for segment to define as data
# data_type: Type of data to set segment to (dependent on architecture)
#
##############################################################################################

def define_as_data_by_size_for_block(start_addr, end_addr, data_size):
curr_addr = start_addr;
while (curr_addr < end_addr):
if (data_size == 1):
MakeByte(curr_addr)
elif (data_size == 2):
MakeWord(curr_addr)
elif (data_size == 4):
MakeDword(curr_addr)
else:
Warning("Invalid data_size. Breaking.")
break;
curr_addr += data_size

start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.")
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.")

if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
data_size = AskLong(1, "Enter the size of each data item to be defined in the address block.\nExample: '1' for byte, '2' for word, '4' for dword\nNote the exact implementation will be dependent on architecture.")
if (data_size == 1 or data_size == 2 or data_size == 4):
print ("[define_data_as_types.py] STARTING. start_addr: 0x%X, end_addr: 0x%X, data_size: %d" % (start_addr, end_addr, data_size))
MakeUnknown(start_addr, (end_addr - start_addr), DOUNK_SIMPLE)
print "[define_data_as_types.py] Undefined all data between 0x%X and 0x%0X" % (start_addr, end_addr)
print "[define_data_as_types.py] Defining all data as size " + str(data_size)
define_as_data_by_size_for_block(start_addr, end_addr, data_size)
print "[define_data_as_types.py] FINISHED."
else:
Warning("[define_data_as_types.py] You entered a size of %d bytes. Please enter 1 (byte), 2 (short/word), 4(long, dword)");

else:
print "[define_data_as_types.py] ERROR. Please enter valid address values."
88 changes: 88 additions & 0 deletions find_mem_accesses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
##############################################################################################
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC
# All rights reserved.
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the "Software"), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify,
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
# OR OTHER DEALINGS IN THE SOFTWARE.

##############################################################################################
# find_mem_accesses.py
#
# Identifies the memory accesses used in the code. When a memory access is identified based
# on the user contributed regular expression, this script completes three different actions
# to help with the static analysis:
# 1. A cross reference is created between the instruction and the memory address. This
# will fail if the address doesn't currently exist because the segment was not created.
# 2. The value at the memory address is retrieved and added as a comment to the
# referencing instruction.
# 3. A dictionary of all of the memory addresses accessed and the referencing instructions'
# addresses are printed and saved to a file.
# ** NOTE:If you are using a Harvard architecture, ensure you can distinguish between memory
# spaces or comment out the cross-reference and value parts of this script.
#
# Inputs: start_addr: Start address for segment to define as data
# end_addr: End address for segment to define as data
# file_name: File to write the accesses to
#
##############################################################################################
import re

################### USER DEFINED VALUES ###################
# Enter a regular expression for the memory access instructions you'd like to identify.
# Also enter the index of the operand in the instruction so that it can be retrieved via
# the GetOperandValue() function.
#
# 8051 (movx DPTR, #addr)
regex_mem_instruct = re.compile(r"mov +DPTR, #")
operand_index = 1
############################################################


start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.")
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.")

default_fn = "memory_use_locations.txt"
filename = AskFile(1, default_fn, "Please choose the location to save the memory accesses file.")

accesses_dict = {}

if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr):
curr_addr = start_addr
while (curr_addr < end_addr):
if (regex_mem_instruct.match(GetDisasm(curr_addr))):
#mem_addr = regex_mem_addr.match(GetDisasm(curr_addr))
mem_addr = GetOperandValue(curr_addr, operand_index)
print "[find_mem_accesses.py] Instruction Address: 0x%x Operand Address: 0x%0x" % (curr_addr, mem_addr)
# Create Cross-Reference to Address
result = add_dref(curr_addr, mem_addr, dr_T)
if (not result):
print "[find_mem_accesses.py] Could NOT create data cross-references."
else:
# Try to Get Value at Memory Address and Record at Reference
# Defaulting to WORD (2 bytes) can change or add other intelligence here
value = Word(mem_addr)
MakeComm(curr_addr, "@[0x%x] = 0x%x" % (mem_addr, value))
if (mem_addr in accesses_dict):
accesses_dict[mem_addr].append(curr_addr)
else:
accesses_dict[mem_addr] = [curr_addr, ]
curr_addr = NextHead(curr_addr)
print "[find_mem_accesses.py] Finished searching range. Writing to file."
with open(filename, "w") as out_file:
for key in sorted(accesses_dict.keys()):
out_file.write("0x%0x: \n" % key)
for ref in accesses_dict[key]:
out_file.write("\t0x%0x \n" % ref)
else:
print "[find_mem_accesses.py] ERROR. Please enter valid addresses."


Loading

0 comments on commit 8880a66

Please sign in to comment.