Skip to content

Commit

Permalink
lint file.py & helpers.py
Browse files Browse the repository at this point in the history
  • Loading branch information
colton-gabertan committed Jun 30, 2023
1 parent 1228631 commit 95a6651
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 34 deletions.
56 changes: 25 additions & 31 deletions capa/features/extractors/ghidra/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import struct
from typing import Tuple, Iterator

from ghidra.program.model.symbol import SourceType
from ghidra.program.model.symbol import SymbolType
from ghidra.program.model.symbol import SourceType, SymbolType

import capa.features.extractors.common
import capa.features.extractors.helpers
Expand Down Expand Up @@ -38,14 +37,14 @@ def check_segment_for_pe() -> Iterator[Tuple[int, int]]:
)
for i in range(256)
]

todo = []
for mzx, pex, i in mz_xor:
# find all segment offsets containing XOR'd "MZ" bytes
for off in capa.features.extractors.ghidra.helpers.find_byte_sequence(mzx):
todo.append((off, mzx, pex, i))

seg_max = currentProgram.getMaxAddress()
seg_max = currentProgram.getMaxAddress()
while len(todo):
off, mzx, pex, i = todo.pop()

Expand All @@ -55,11 +54,11 @@ def check_segment_for_pe() -> Iterator[Tuple[int, int]]:
if seg_max.getOffset() < (e_lfanew.getOffset() + 4):
continue

e_lfanew_bytes = b''
e_lfanew_bytes = b""
try:
e_lfanew_sbytes = getBytes(e_lfanew, 4)
e_lfanew_sbytes = getBytes(e_lfanew, 4)
for b in e_lfanew_sbytes:
b = (b & 0xFF).to_bytes(1, 'little')
b = (b & 0xFF).to_bytes(1, "little")
e_lfanew_bytes = e_lfanew_bytes + b
except RuntimeError: # no bytes will be returned, so we can bail out here
return
Expand All @@ -74,11 +73,11 @@ def check_segment_for_pe() -> Iterator[Tuple[int, int]]:
if seg_max.getOffset() < (peoff.getOffset() + 2):
continue

pe_bytes = b''
pe_bytes = b""
try:
pe_off_bytes = getBytes(peoff, 2)
for b in pe_off_bytes:
b = (b & 0xFF).to_bytes(1, 'little')
b = (b & 0xFF).to_bytes(1, "little")
pe_bytes = pe_bytes + b
except RuntimeError:
return
Expand All @@ -88,19 +87,17 @@ def check_segment_for_pe() -> Iterator[Tuple[int, int]]:


def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]:
"""extract embedded PE features
"""
"""extract embedded PE features"""

for ea, _ in check_segment_for_pe():
yield Characteristic("embedded pe"), FileOffsetAddress(ea)


def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]:
"""extract function exports
"""
"""extract function exports"""
st = currentProgram.getSymbolTable()
for addr in st.getExternalEntryPointIterator():
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())
yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset())


def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
Expand All @@ -116,13 +113,12 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]:
"""

for f in currentProgram.getFunctionManager().getExternalFunctions():

for r in f.getSymbol().getReferences():
if (r.getReferenceType().isData()):
addr = r.getFromAddress().getOffset() # gets pointer to fake external addr
if r.getReferenceType().isData():
addr = r.getFromAddress().getOffset() # gets pointer to fake external addr

fstr = f.toString().split('::') # format: MODULE.dll::import / MODULE::Ordinal_*
if 'Ordinal_' in fstr[1]:
fstr = f.toString().split("::") # format: MODULE.dll::import / MODULE::Ordinal_*
if "Ordinal_" in fstr[1]:
fstr[1] = f"#{fstr[1].split('_')[1]}"

for name in capa.features.extractors.helpers.generate_symbols(fstr[0][:-4], fstr[1]):
Expand All @@ -140,13 +136,13 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]:
"""extract ASCII and UTF-16 LE strings"""

for block in currentProgram.getMemory().getBlocks():
p_bytes = b''
p_bytes = b""
addr = block.getStart()
while (block.isInitialized() and addr.getOffset() <= block.getEnd().getOffset()):
p_bytes = p_bytes + ((block.getByte(addr) & 0xFF).to_bytes(1, 'little'))
while block.isInitialized() and addr.getOffset() <= block.getEnd().getOffset():
p_bytes = p_bytes + ((block.getByte(addr) & 0xFF).to_bytes(1, "little"))
try:
addr = addr.add(1)
except RuntimeError: # throws AddressOverflow error in Java
except RuntimeError: # throws AddressOverflow error in Java
break

for s in capa.features.extractors.strings.extract_ascii_strings(p_bytes):
Expand All @@ -163,9 +159,9 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
extract the names of statically-linked library functions.
"""

for sym in currentProgram.getSymbolTable().getAllSymbols(True):
for sym in currentProgram.getSymbolTable().getAllSymbols(True):
# .isExternal() misses more than this config for the function symbols
if (sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal()):
if sym.getSymbolType() == SymbolType.FUNCTION and sym.getSource() == SourceType.ANALYSIS and sym.isGlobal():
name = sym.getName() # starts to resolve names based on Ghidra's FidDB
addr = AbsoluteVirtualAddress(sym.getAddress().getOffset())
yield FunctionName(name), addr
Expand All @@ -176,15 +172,14 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]:
# see: https://stackoverflow.com/a/2628384/87207
yield FunctionName(name[1:]), addr


def extract_file_format() -> Iterator[Tuple[Feature, Address]]:

def extract_file_format() -> Iterator[Tuple[Feature, Address]]:
ef = currentProgram.getExecutableFormat()
if 'PE' in ef:
if "PE" in ef:
yield Format(FORMAT_PE), NO_ADDRESS
elif 'ELF' in ef:
elif "ELF" in ef:
yield Format(FORMAT_ELF), NO_ADDRESS
elif 'Raw' in ef:
elif "Raw" in ef:
# no file type to return when processing a binary file, but we want to continue processing
return
else:
Expand Down Expand Up @@ -218,4 +213,3 @@ def main():

if __name__ == "__main__":
main()

3 changes: 0 additions & 3 deletions capa/features/extractors/ghidra/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,3 @@ def find_byte_sequence(seq: bytes) -> Iterator[int]:
ea = findBytes(currentProgram.getMinAddress().add(1), seqstr, 1, 1)
for e in ea:
yield e



0 comments on commit 95a6651

Please sign in to comment.