Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update create() function to enable filling entirely the database #11

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 144 additions & 43 deletions src/bindiff/file.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from pathlib import Path
import sqlite3
import hashlib
from datetime import datetime
from dataclasses import dataclass
from typing import Union
import ctypes

from bindiff.types import FunctionAlgorithm, BasicBlockAlgorithm
from bindiff.types import FunctionAlgorithm, BasicBlockAlgorithm, function_algorithm_str, basicblock_algorithm_str


@dataclass
Expand Down Expand Up @@ -78,8 +77,10 @@ class BindiffFile(object):
def __init__(self, file: Union[Path, str], permission: str = "ro"):
"""
:param file: path to Bindiff database
:param permission: permission to use for opening database (default: ro)
:param permission: database permissions (default: ro)
"""
assert permission in ["ro", "rw"]

self._file = file

# Open database
Expand All @@ -92,13 +93,11 @@ def __init__(self, file: Union[Path, str], permission: str = "ro"):
self.version: str = None #: version of the differ used for diffing
self.created: datetime = None #: Database creation date
self.modified: datetime = None #: Database last modification date
self._load_metadata(self.db.cursor())


# Files
self.primary_file: File = None #: Primary file
self.secondary_file: File = None #: Secondary file
self._load_file(self.db.cursor())
# fmt: on

# Function matches
self.primary_functions_match: dict[
Expand All @@ -107,7 +106,6 @@ def __init__(self, file: Union[Path, str], permission: str = "ro"):
self.secondary_functions_match: dict[
int, FunctionMatch
] = {} #: FunctionMatch indexed by addresses in secondary
self._load_function_match(self.db.cursor())

# Basicblock matches: BB-addr -> fun-addr -> match
self.primary_basicblock_match: dict[
Expand All @@ -116,13 +114,21 @@ def __init__(self, file: Union[Path, str], permission: str = "ro"):
self.secondary_basicblock_match: dict[
int, dict[int, BasicBlockMatch]
] = {} #: Basic block match from secondary
self._load_basicblock_match(self.db.cursor())


# Instruction matches
# {inst_addr : {match_func_addr : match_inst_addr}}
self.primary_instruction_match: dict[int, dict[int, int]] = {}
self.secondary_instruction_match: dict[int, dict[int, int]] = {}
self._load_instruction_match(self.db.cursor())

# If 'ro', load database content
if permission == "ro":
self._load_metadata(self.db.cursor())
self._load_file(self.db.cursor())
self._load_function_match(self.db.cursor())
self._load_basicblock_match(self.db.cursor())
self._load_instruction_match(self.db.cursor())


@property
def unmatched_primary_count(self) -> int:
Expand Down Expand Up @@ -169,7 +175,7 @@ def _load_file(self, cursor: sqlite3.Cursor) -> None:
:param cursor: sqlite3 cursor to the DB
"""
files = cursor.execute("SELECT * FROM file").fetchall()
assert len(files) >= 2
# assert len(files) >= 2

self.primary_file = File(*files[0])
self.secondary_file = File(*files[1])
Expand Down Expand Up @@ -268,7 +274,7 @@ def init_database(db: sqlite3.Connection) -> None:
CREATE TABLE metadata (version TEXT, file1 INTEGER, file2 INTEGER, description TEXT, created DATE,
modified DATE, similarity DOUBLE PRECISION, confidence DOUBLE PRECISION,
FOREIGN KEY(file1) REFERENCES file(id), FOREIGN KEY(file2) REFERENCES file(id))""")
conn.execute("""CREATE TABLE functionalgorithm (id SMALLINT PRIMARY KEY, name TEXT)""")
conn.execute("""CREATE TABLE functionalgorithm (id INTEGER PRIMARY KEY, name TEXT)""")
conn.execute("""
CREATE TABLE function (id INTEGER PRIMARY KEY, address1 BIGINT, name1 TEXT, address2 BIGINT,
name2 TEXT, similarity DOUBLE PRECISION, confidence DOUBLE PRECISION, flags INTEGER,
Expand All @@ -286,16 +292,11 @@ def init_database(db: sqlite3.Connection) -> None:
db.commit()
# fmt: on

conn.execute(
"""INSERT INTO basicblockalgorithm(name) VALUES ("basicBlock: edges prime product")"""
)
db.commit()

@staticmethod
def create(
filename: str,
primary: str,
secondary: str,
version: str,
desc: str,
similarity: float,
Expand All @@ -306,8 +307,6 @@ def create(
It only takes two binaries.

:param filename: database file path
:param primary: path to primary export file
:param secondary: path to secondary export file
:param version: version of the differ used
:param desc: description of the database
:param similarity: similarity score between to two binaries
Expand All @@ -320,22 +319,6 @@ def create(

conn = db.cursor()

# Save primary
file1 = Path(primary)
hash1 = hashlib.sha256(file1.read_bytes()).hexdigest() if file1.exists() else ""
conn.execute(
"""INSERT INTO file (filename, exefilename, hash) VALUES (:filename, :name, :hash)""",
{"filename": str(file1.with_suffix("").name), "name": file1.name, "hash": hash1},
)

# Save secondary
file2 = Path(secondary)
hash2 = hashlib.sha256(file2.read_bytes()).hexdigest() if file2.exists() else ""
conn.execute(
"""INSERT INTO file (filename, exefilename, hash) VALUES (:filename, :name, :hash)""",
{"filename": str(file2.with_suffix("").name), "name": file2.name, "hash": hash2},
)

conn.execute(
"""
INSERT INTO metadata (version, file1, file2, description, created, modified, similarity, confidence)
Expand All @@ -353,10 +336,89 @@ def create(
},
)

# Fill functionalgorithm table
for algo in FunctionAlgorithm:
algo_str = function_algorithm_str(algo)
conn.execute(
"""INSERT INTO functionalgorithm (name) VALUES (:name)""",
{"name": f"function: {algo_str}"},
)

# Fill basicblockalgorithm table
for algo in BasicBlockAlgorithm:
algo_str = basicblock_algorithm_str(algo)
conn.execute(
"""INSERT INTO basicblockalgorithm (name) VALUES (:name)""",
{"name": f"basicBlock: {algo_str}"},
)


db.commit()
db.close()
return BindiffFile(filename, permission="rw")

def add_file_matched(self,
export_name: str,
hash: str,
executable_name: str = "",
functions: int = 0,
libfunctions: int = 0,
calls: int = 0,
basicblocks: int = 0,
libbasicblocks: int = 0,
edges: int = 0,
libedges: int = 0,
instructions: int = 0,
libinstructions: int = 0):
"""
Add a file matched.
Only export_name and hash are mandatory.

:warning: not providing the other field might not
render correctly in Bindiff, or IDA plugins.

:param export_name: Export filename (with extension).
:param hash: SHA256 hash of the executable
:param executable_name: Executable filename (if none is provided, export without extension)
:param functions: number of functions
:param libfunctions:number of library functions
:param calls: number of calls
:param basicblocks: number of basic blocks
:param libbasicblocks: number of library basic blocks
:param edges: number of CFG edges
:param libedges: number of library CFG edges
:param instructions: number of instructions
:param libinstructions: number of library instructions
:return: None
"""
cursor = self.db.cursor()

export_p = Path(export_name)

params = {
"filename": export_p.with_suffix("").name,
"exefilename": executable_name if executable_name else export_p.with_suffix("").name,
"hash": hash,
"functions": functions,
"libfunctions": libfunctions,
"calls": calls,
"basicblocks": basicblocks,
"libbasicblocks": libbasicblocks,
"edges": edges,
"libedges": libedges,
"instructions": instructions,
"libinstructions": libinstructions
}

keys = list(params)
dotkeys = [f":{x}" for x in keys]

cursor.execute(
f"INSERT INTO file ({','.join(keys)}) VALUES ({','.join(dotkeys)})",
params,
)


def add_function_match(
self,
fun_addr1: int,
Expand All @@ -382,8 +444,21 @@ def add_function_match(
cursor = self.db.cursor()
cursor.execute(
"""
INSERT INTO function (address1, address2, name1, name2, similarity, confidence, basicblocks)
VALUES (:address1, :address2, :name1, :name2, :similarity, :confidence, :identical_bbs)
INSERT INTO function (address1,
address2,
name1,
name2,
similarity,
confidence,
flags,
algorithm,
evaluate,
commentsported,
basicblocks,
edges,
instructions)
VALUES (:address1, :address2, :name1, :name2, :similarity,
:confidence, 0, 19, 0, 0, :identical_bbs, 0, 0)
""",
{
"address1": fun_addr1,
Expand All @@ -398,13 +473,12 @@ def add_function_match(
return cursor.lastrowid

def add_basic_block_match(
self, fun_addr1: int, fun_addr2: int, bb_addr1: int, bb_addr2: int
self, funentry_id: int, bb_addr1: int, bb_addr2: int
) -> int:
"""
Add a basic block match in database.

:param fun_addr1: function address of basic block in primary
:param fun_addr2: function address of basic block in secondary
:param funentry_id: Db Id of the function match
:param bb_addr1: basic block address in primary
:param bb_addr2: basic block address in secondary
:return: id of the row inserted in database.
Expand All @@ -413,15 +487,15 @@ def add_basic_block_match(

cursor.execute(
"""
INSERT INTO basicblock (functionid, address1, address2, algorithm)
VALUES ((SELECT id FROM function WHERE address1=:function_address1 AND address2=:function_address2), :address1, :address2, :algorithm)
INSERT INTO basicblock (functionid, address1, address2, algorithm, evaluate)
VALUES (:funentry_id, :address1, :address2, :algorithm, :evaluate)
""",
{
"function_address1": fun_addr1,
"function_address2": fun_addr2,
"funentry_id": funentry_id,
"address1": bb_addr1,
"address2": bb_addr2,
"algorithm": "1",
"evaluate": "0"
},
)
return cursor.lastrowid
Expand Down Expand Up @@ -475,4 +549,31 @@ def update_file_infos(
"instructions": inst_count,
},
)


def update_samebb_function_match(
self, funentry_id: int, same_bb_count: int) -> None:
"""
Update same basicblock information in function table

:param funentry_id: id of function matvch entry
:param same_bb_count: number of identical basic blocks
"""
cursor = self.db.cursor()

cursor.execute(
"""
UPDATE function SET basicblocks = :bb_count WHERE id = :entry_id
""",
{
"entry_id": str(funentry_id),
"bb_count": same_bb_count
},
)


def commit(self) -> None:
"""
Commit all pending transaction in the database.
"""
self.db.commit()
49 changes: 49 additions & 0 deletions src/bindiff/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,32 @@ class BasicBlockAlgorithm(IntEnum):
manual = 20


def basicblock_algorithm_str(algo: BasicBlockAlgorithm) -> str:
match algo:
case BasicBlockAlgorithm.edges_prime_product: return "edges prime product"
case BasicBlockAlgorithm.hash_matching_four_inst_min: return "hash matching (4 instructions minimum)"
case BasicBlockAlgorithm.prime_matching_four_inst_min: return "prime matching (4 instructions minimum)"
case BasicBlockAlgorithm.call_reference_matching: return "call reference matching"
case BasicBlockAlgorithm.string_references_matching: return "string reference matching"
case BasicBlockAlgorithm.edges_md_index_top_down: return "edges MD index (top down)"
case BasicBlockAlgorithm.md_index_matching_top_down: return "MD index matching (top down)"
case BasicBlockAlgorithm.edges_md_index_bottom_up: return "edges MD index (bottom up)"
case BasicBlockAlgorithm.md_index_matching_bottom_up: return "MD index matching (bottom up)"
case BasicBlockAlgorithm.relaxed_md_index_matching: return "relaxed MD index matching"
case BasicBlockAlgorithm.prime_matching_no_inst_min: return "prime matching (0 instructions minimum)"
case BasicBlockAlgorithm.edges_lengauer_tarjan_dominated: return "edges Lengauer Tarjan dominated"
case BasicBlockAlgorithm.loop_entry_matching: return "loop entry matching"
case BasicBlockAlgorithm.self_loop_matching: return "self loop matching"
case BasicBlockAlgorithm.entry_point_matching: return "entry point matching"
case BasicBlockAlgorithm.exit_point_matching: return "exit point matching"
case BasicBlockAlgorithm.instruction_count_matching: return "instruction count matching"
case BasicBlockAlgorithm.jump_sequence_matching: return "jump sequence matching"
case BasicBlockAlgorithm.propagation_size_one: return "propagation (size==1)"
case BasicBlockAlgorithm.manual: return "manual"
case _:
assert False


class FunctionAlgorithm(IntEnum):
"""
Function matching algorithm enum. (id's does not seem to change in
Expand All @@ -63,3 +89,26 @@ class FunctionAlgorithm(IntEnum):
call_sequence_matching_sequence = 17
call_reference_matching = 18
manual = 19

def function_algorithm_str(algo: FunctionAlgorithm) -> str:
match algo:
case FunctionAlgorithm.name_hash_matching: return "name hash matching"
case FunctionAlgorithm.hash_matching: return "hash matching"
case FunctionAlgorithm.edges_flowgraph_md_index: return "edges flowgraph MD index"
case FunctionAlgorithm.edges_callgraph_md_index: return "edges callgraph MD index"
case FunctionAlgorithm.md_index_matching_flowgraph_top_down: return "MD index matching (flowgraph MD index, top down)"
case FunctionAlgorithm.md_index_matching_flowgraph_bottom_up: return "MD index matching (flowgraph MD index, bottom up)"
case FunctionAlgorithm.prime_signature_matching: return "signature matching"
case FunctionAlgorithm.md_index_matching_callGraph_top_down: return "MD index matching (callGraph MD index, top down)"
case FunctionAlgorithm.md_index_matching_callGraph_bottom_up: return "MD index matching (callGraph MD index, bottom up)"
case FunctionAlgorithm.relaxed_md_index_matching: return "MD index matching"
case FunctionAlgorithm.instruction_count: return "instruction count"
case FunctionAlgorithm.address_sequence: return "address sequence"
case FunctionAlgorithm.string_references: return "string references"
case FunctionAlgorithm.loop_count_matching: return "loop count matching"
case FunctionAlgorithm.call_sequence_matching_exact: return "call sequence matching(exact)"
case FunctionAlgorithm.call_sequence_matching_topology: return "call sequence matching(topology)"
case FunctionAlgorithm.call_sequence_matching_sequence: return "call sequence matching(sequence)"
case FunctionAlgorithm.call_reference_matching: return "call rerferences matching"
case FunctionAlgorithm.manual: return "manual"
case _: assert False