-
Notifications
You must be signed in to change notification settings - Fork 142
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f29cbac
commit 8880a66
Showing
7 changed files
with
582 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
############################################################################################## | ||
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC | ||
# All rights reserved. | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this | ||
# software and associated documentation files (the "Software"), to deal in the Software | ||
# without restriction, including without limitation the rights to use, copy, modify, | ||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | ||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE | ||
# OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
############################################################################################## | ||
# data_offset_calc.py | ||
# Resolves the references to indirect offsets of a variable, register, or memory location | ||
# whose value is known. Changes the display of the operand in the instruction (OpAlt function), | ||
# creates a data cross references (add_dref), and creates a comment of the resolved address | ||
# (MakeComment). User nees to define the following: | ||
# offset_var_string: The string representation of the variable, register, or memory | ||
# location to be replaced by the resolved value | ||
# offset_var_value: The value of the variable defined in offset_var_string | ||
# reg_ex_indirect: A regular expression of how indirect offset accesses to the variable | ||
# reg_ex_immediate: A regular expression of how the immediate offset value is represented | ||
# new_opnd_display: A string representation of how the calculated and resolved | ||
# value should be displayed as the operand in the instruction | ||
# | ||
# Inputs: start_addr: Start address for segment to define as data | ||
# end_addr: End address for segment to define as data | ||
# | ||
############################################################################################## | ||
import re | ||
|
||
|
||
################### USER DEFINED VALUES ################### | ||
# String of the variable/register/location used as the indirect variable | ||
offset_var_str = "fp" | ||
|
||
# The defined offset_var_str's value | ||
offset_var_value = 0x808000 | ||
|
||
# Regular expression for out offset_var_str is referenced indirectly in the IDA Disassembly | ||
# @(-0x(1-8 hex chars), fp ) | ||
reg_ex_indirect = re.compile(r"@\(-?0x[0-9A-Fa-f]{1,8}, "+ offset_var_str +"\)") | ||
|
||
# Regular expression for how immediate values are shown in the indirect reference | ||
# For this example, it's 0x1044, but some architectures would show that as 1044h | ||
regex_immediate = re.compile(r"0x[0-9A-Fa-f]{1,8}") | ||
|
||
# String expression for how the newly calculated instruction should be displayed within the instruction | ||
new_opnd_display = '@[0x%x]' | ||
|
||
# OPTIONAL ---- EXAMPLE FOR ADDING OTHER INSTRUCTIONS TO THE PROCESSING | ||
# If you'd like to add other instructions to be processed for resolving indirect offset accesses, | ||
# update the regular expression here and use it as shown in the "else" block below | ||
reg_ex_add3 = re.compile(r"add3 \w\w, fp, #-?0x[0-9A-Fa-f]{1,8}") | ||
############################################################# | ||
|
||
start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be analyzed.") | ||
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be analyzed.") | ||
|
||
if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): | ||
print "[data_offset_calc.py] STARTING. Looking for indirect accesses across 0x%x to 0x%x" % (start_addr, end_addr) | ||
curr_addr = start_addr; | ||
while curr_addr < end_addr: | ||
operand = GetOpnd(curr_addr, 1) # Operand = 2nd Operand in the Instruction at curr_addr | ||
if reg_ex_indirect.match(operand): | ||
print ('[data_offset_calc.py] 0x%x Operand: ' % curr_addr) + operand | ||
|
||
# This checks if there are any immediate values also in the 2nd operand with the variable. For example, mov R3, @(0x10, fp) | ||
offset = re.findall(regex_immediate, operand) | ||
if (offset): | ||
print "[data_offset_calc.py] 0x%x Offset: 0x%x" % (curr_addr, int(offset[0],16)) | ||
|
||
# Check if Immediate Operand is Neg or Pos | ||
if '-' in operand : | ||
new_opnd = offset_var_value - int(offset[0], 16) | ||
else: | ||
new_opnd = offset_var_value + int(offset[0], 16) | ||
|
||
print ("[data_offset_calc.py] 0x%x: Offset + " + offset_var_str + " = 0x%0x") % (curr_addr, new_opnd) | ||
OpAlt(curr_addr, 1, new_opnd_display % new_opnd) # Changes Display of Instruction | ||
result = add_dref(curr_addr, new_opnd, dr_T) # Create Data Ref -- Using dref_T because not checking if read or write | ||
print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result) | ||
# Using dr_O (O as in Offset, not 0) because we are not check if this a "write" or "read" | ||
else: | ||
print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr | ||
|
||
##################################################################################### | ||
# This block is optional but shows how to add additional regular expressions for other instructions | ||
# you'd like to match besides the general indirect offset acceses. For M32R we are also matching | ||
# the add3 instruction that take the form "add3 Reg, fp, 0xNUM" | ||
|
||
else: | ||
instruct = GetDisasm(curr_addr) | ||
if reg_ex_add3.match(instruct): | ||
print ('[data_offset_calc.py] 0x%08x Instruct: ' % curr_addr) + instruct | ||
immed_opnd = GetOpnd(curr_addr, 2) # Getting the 3rd Operand Based on the reg_ex_add3 defined above | ||
offset = re.findall(regex_immediate, immed_opnd); | ||
if offset: | ||
if '-' in immed_opnd: | ||
new_opnd = offset_var_value - int(offset[0], 16) | ||
else: | ||
new_opnd = offset_var_value + int(offset[0], 16) | ||
print '[data_offset_calc.py] 0x%x: Offset + fp = 0x%08x' % (curr_addr, new_opnd) | ||
MakeComm(curr_addr, '0x%08x' % new_opnd) # Add comment with new operand instead of overwriting instruction as done above | ||
result = add_dref(curr_addr, new_opnd, dr_T) # Creates Data XREF from Instruct to Calculated Val | ||
print ("[data_offset_calc.py] Creating dref from 0x%x to 0x%x: " % (curr_addr, new_opnd)) + str(result) | ||
else: | ||
print "[data_offset_calc.py] 0x%x: No immediate offset identified." % curr_addr | ||
######################################################################################## | ||
prev = curr_addr | ||
curr_addr = NextHead(curr_addr, 0xFFFFF) | ||
if (curr_addr == BADADDR): | ||
print "[data_offset_calc.py] EXITING." | ||
break | ||
else: | ||
print "[data_offset_calc.py] QUITTING. Invalid values entered for starting and ending addresses." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
############################################################################################## | ||
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC | ||
# All rights reserved. | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this | ||
# software and associated documentation files (the "Software"), to deal in the Software | ||
# without restriction, including without limitation the rights to use, copy, modify, | ||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | ||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE | ||
# OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
############################################################################################## | ||
# define_code_functions.py | ||
# Attempts to define the bytes in the user-entered address range as code and then as functions | ||
# based on the user-define smart_prolog and smart_epilog regular expressions for that architecture. | ||
# | ||
# Inputs: start_addr: Start address for segment to define as data | ||
# end_addr: End address for segment to define as data | ||
# data_type: Type of data to set segment to (dependent on architecture) | ||
# | ||
############################################################################################## | ||
import re | ||
|
||
################### USER DEFINED VALUES ################### | ||
# Enter a regular expression for how this architecture usually begins and ends functions. | ||
# If the architecture does not dictate how to start or end a function use r".*" to allow | ||
# for any instruction | ||
|
||
# 8051 Architecture Prologue and Epilogue | ||
smart_prolog = re.compile(r".*") | ||
smart_epilog = re.compile(r"reti{0,1}") | ||
|
||
# PIC18 Architecture Prologue and Epilogue | ||
#smart_prolog = re.compile(r".*") | ||
#smart_epilog = re.compile(r"return 0") | ||
|
||
# Mitsubishi M32R Architecutre Prologue and Epilogue | ||
#smart_prolog = re.compile(r"push +lr") | ||
#smart_epilog = re.compile(r"jmp +lr.*") | ||
|
||
# Texas Instruments TMS320C28x | ||
#smart_prolog = re.compile(r".*") | ||
#smart_epilog = re.compile(r"lretr") | ||
|
||
# AVR | ||
#smart_prolog = re.compile(r"push +r") | ||
#smart_epilog = re.compile(r"reti{0,1}") | ||
############################################################ | ||
|
||
start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.") | ||
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.") | ||
|
||
if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR)): | ||
do_make_unk = AskYN(0, "Do you want to make all of the code block UNKNOWN first?") | ||
if (do_make_unk == 1): | ||
curr_addr = start_addr | ||
while (curr_addr < end_addr): | ||
MakeUnkn(curr_addr,idc.DOUNK_SIMPLE) | ||
curr_addr += 1 | ||
if (do_make_unk != -1): | ||
curr_addr = start_addr | ||
print "[make_code_functions.py] Running script to define code and functions on 0x%x to 0x%x" % (start_addr, end_addr) | ||
while (curr_addr < end_addr): | ||
next_unexplored = FindUnexplored(curr_addr, idaapi.BIN_SEARCH_FORWARD) | ||
MakeCode(next_unexplored) # We don't care whether it succeeds or fails so not storing retval | ||
curr_addr = next_unexplored | ||
|
||
# Finished attempting to make all unexplored bytes into code | ||
# Now, attempt to create functions of all code not currently in a function | ||
print "[make_code_functions.py] Completed attempting to define bytes as code. Now trying to define functions." | ||
curr_addr = start_addr | ||
while (curr_addr != BADADDR and curr_addr < end_addr): | ||
if (isCode(GetFlags(curr_addr)) and GetFunctionAttr(curr_addr, FUNCATTR_START) == BADADDR): | ||
#print "Function Stuffs 0x%0x" % curr_addr | ||
if(smart_prolog.match(GetDisasm(curr_addr)) or smart_epilog.match(GetDisasm(PrevHead(curr_addr)))): | ||
#print "Smart Prolog match" | ||
if (MakeFunction(curr_addr) != 0): | ||
# MakeFunction(curr_addr) was successful so set curr_addr to next addr after the new function | ||
curr_addr = GetFunctionAttr(curr_addr, FUNCATTR_END) # Returns first address AFTER the end of the function | ||
continue | ||
curr_addr = NextHead(curr_addr) | ||
else: | ||
print "[make_code_functions.py] Quitting. Entered address values are not valid." | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
############################################################################################## | ||
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC | ||
# All rights reserved. | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this | ||
# software and associated documentation files (the "Software"), to deal in the Software | ||
# without restriction, including without limitation the rights to use, copy, modify, | ||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | ||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE | ||
# OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
############################################################################################## | ||
# define_data_as_types.py | ||
# Defines a segment of addresses as the user-specified data type (byte, word, or double word). | ||
# The byte length for each of these types is architecture dependent, but generally: | ||
# 1 byte = Byte | ||
# 2 bytes = Word | ||
# 4 bytes = Double Word | ||
# This script with undefine all bytes in the range first which means if you previously had | ||
# code or strings defined in the area, they will be overwritten as data. | ||
# | ||
# Inputs: start_addr: Start address for segment to define as data | ||
# end_addr: End address for segment to define as data | ||
# data_type: Type of data to set segment to (dependent on architecture) | ||
# | ||
############################################################################################## | ||
|
||
def define_as_data_by_size_for_block(start_addr, end_addr, data_size): | ||
curr_addr = start_addr; | ||
while (curr_addr < end_addr): | ||
if (data_size == 1): | ||
MakeByte(curr_addr) | ||
elif (data_size == 2): | ||
MakeWord(curr_addr) | ||
elif (data_size == 4): | ||
MakeDword(curr_addr) | ||
else: | ||
Warning("Invalid data_size. Breaking.") | ||
break; | ||
curr_addr += data_size | ||
|
||
start_addr = AskAddr(MinEA(), "Please enter the starting address for the data to be defined.") | ||
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the data to be defined.") | ||
|
||
if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): | ||
data_size = AskLong(1, "Enter the size of each data item to be defined in the address block.\nExample: '1' for byte, '2' for word, '4' for dword\nNote the exact implementation will be dependent on architecture.") | ||
if (data_size == 1 or data_size == 2 or data_size == 4): | ||
print ("[define_data_as_types.py] STARTING. start_addr: 0x%X, end_addr: 0x%X, data_size: %d" % (start_addr, end_addr, data_size)) | ||
MakeUnknown(start_addr, (end_addr - start_addr), DOUNK_SIMPLE) | ||
print "[define_data_as_types.py] Undefined all data between 0x%X and 0x%0X" % (start_addr, end_addr) | ||
print "[define_data_as_types.py] Defining all data as size " + str(data_size) | ||
define_as_data_by_size_for_block(start_addr, end_addr, data_size) | ||
print "[define_data_as_types.py] FINISHED." | ||
else: | ||
Warning("[define_data_as_types.py] You entered a size of %d bytes. Please enter 1 (byte), 2 (short/word), 4(long, dword)"); | ||
|
||
else: | ||
print "[define_data_as_types.py] ERROR. Please enter valid address values." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
############################################################################################## | ||
# Copyright 2017 The Johns Hopkins University Applied Physics Laboratory LLC | ||
# All rights reserved. | ||
# Permission is hereby granted, free of charge, to any person obtaining a copy of this | ||
# software and associated documentation files (the "Software"), to deal in the Software | ||
# without restriction, including without limitation the rights to use, copy, modify, | ||
# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to | ||
# permit persons to whom the Software is furnished to do so. | ||
# | ||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | ||
# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR | ||
# PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | ||
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | ||
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE | ||
# OR OTHER DEALINGS IN THE SOFTWARE. | ||
|
||
############################################################################################## | ||
# find_mem_accesses.py | ||
# | ||
# Identifies the memory accesses used in the code. When a memory access is identified based | ||
# on the user contributed regular expression, this script completes three different actions | ||
# to help with the static analysis: | ||
# 1. A cross reference is created between the instruction and the memory address. This | ||
# will fail if the address doesn't currently exist because the segment was not created. | ||
# 2. The value at the memory address is retrieved and added as a comment to the | ||
# referencing instruction. | ||
# 3. A dictionary of all of the memory addresses accessed and the referencing instructions' | ||
# addresses are printed and saved to a file. | ||
# ** NOTE:If you are using a Harvard architecture, ensure you can distinguish between memory | ||
# spaces or comment out the cross-reference and value parts of this script. | ||
# | ||
# Inputs: start_addr: Start address for segment to define as data | ||
# end_addr: End address for segment to define as data | ||
# file_name: File to write the accesses to | ||
# | ||
############################################################################################## | ||
import re | ||
|
||
################### USER DEFINED VALUES ################### | ||
# Enter a regular expression for the memory access instructions you'd like to identify. | ||
# Also enter the index of the operand in the instruction so that it can be retrieved via | ||
# the GetOperandValue() function. | ||
# | ||
# 8051 (movx DPTR, #addr) | ||
regex_mem_instruct = re.compile(r"mov +DPTR, #") | ||
operand_index = 1 | ||
############################################################ | ||
|
||
|
||
start_addr = AskAddr(MinEA(), "Please enter the starting address for the code to be analyzed.") | ||
end_addr = AskAddr(MaxEA(), "Please enter the ending address for the code to be analyzed.") | ||
|
||
default_fn = "memory_use_locations.txt" | ||
filename = AskFile(1, default_fn, "Please choose the location to save the memory accesses file.") | ||
|
||
accesses_dict = {} | ||
|
||
if ((start_addr is not None and end_addr is not None) and (start_addr != BADADDR and end_addr != BADADDR) and start_addr < end_addr): | ||
curr_addr = start_addr | ||
while (curr_addr < end_addr): | ||
if (regex_mem_instruct.match(GetDisasm(curr_addr))): | ||
#mem_addr = regex_mem_addr.match(GetDisasm(curr_addr)) | ||
mem_addr = GetOperandValue(curr_addr, operand_index) | ||
print "[find_mem_accesses.py] Instruction Address: 0x%x Operand Address: 0x%0x" % (curr_addr, mem_addr) | ||
# Create Cross-Reference to Address | ||
result = add_dref(curr_addr, mem_addr, dr_T) | ||
if (not result): | ||
print "[find_mem_accesses.py] Could NOT create data cross-references." | ||
else: | ||
# Try to Get Value at Memory Address and Record at Reference | ||
# Defaulting to WORD (2 bytes) can change or add other intelligence here | ||
value = Word(mem_addr) | ||
MakeComm(curr_addr, "@[0x%x] = 0x%x" % (mem_addr, value)) | ||
if (mem_addr in accesses_dict): | ||
accesses_dict[mem_addr].append(curr_addr) | ||
else: | ||
accesses_dict[mem_addr] = [curr_addr, ] | ||
curr_addr = NextHead(curr_addr) | ||
print "[find_mem_accesses.py] Finished searching range. Writing to file." | ||
with open(filename, "w") as out_file: | ||
for key in sorted(accesses_dict.keys()): | ||
out_file.write("0x%0x: \n" % key) | ||
for ref in accesses_dict[key]: | ||
out_file.write("\t0x%0x \n" % ref) | ||
else: | ||
print "[find_mem_accesses.py] ERROR. Please enter valid addresses." | ||
|
||
|
Oops, something went wrong.