Skip to content

Commit

Permalink
Update LIEF to 0.14.0 and mypy & pyright (#17)
Browse files Browse the repository at this point in the history
* Added a new example for debug information
* Bumped code for newer LIEF
* changed attributes where necessary
* fixed mypy and pyright warnings
* bumped mypy and pyright versions
  • Loading branch information
fzakaria authored Feb 2, 2024
1 parent 315be4c commit 63d0ee9
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 79 deletions.
15 changes: 15 additions & 0 deletions examples/debug-information/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Variables
CC = g++
CFLAGS = -Wall -g
TARGET = exe
SRC = debug.cc

# Default rule
all: $(TARGET)

$(TARGET): $(SRC)
$(CC) $(CFLAGS) -o $@ $<

.PHONY: clean
clean:
rm -f $(TARGET)
19 changes: 19 additions & 0 deletions examples/debug-information/debug.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include <iostream>

typedef struct Input {
int x;
int y;
} Input;

int product(Input input) {
int result = input.x * input.y;
return result;
}

int main() {
int a = 5;
int b = 3;
int result = product({a, b});
std::cout << "The product is: " << result << std::endl;
return 0;
}
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ classifiers = [
]
dependencies = [
"capstone >= 5.0.1",
"lief >=0.13.2",
"lief >=0.14.0",
"apsw >= 3.43.1.0",
"sh >= 2.0.6",
]
Expand All @@ -39,9 +39,9 @@ dev = [
"isort >= 5.12.0",
"flake8 >= 6.1.0",
"flake8-print >= 5.0.0",
"pyright >= 1.1.325",
"pyright >= 1.1.349",
"pytest >= 7.4.0",
"mypy >= 1.0.0",
"mypy >= 1.8.0",
"coverage[toml] >= 7.3",
]

Expand Down
86 changes: 49 additions & 37 deletions sqlelf/elf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@
import capstone # type: ignore
import lief

# ELF.pyi has no matching py file since it's a c extension
# pyright: reportMissingModuleSource=false
# https://github.com/microsoft/pyright/issues/5950
import lief.ELF

from sqlelf import lief_ext
from sqlelf._vendor.elftools.common.utils import bytes2str
from sqlelf._vendor.elftools.dwarf.descriptions import describe_form_class
from sqlelf._vendor.elftools.dwarf.die import DIE as DIE_t
Expand Down Expand Up @@ -101,17 +107,21 @@ def register_generator(


def register_dynamic_entries_generator(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the .dynamic section virtual table."""

def dynamic_entries_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
for entry in binary.dynamic_entries: # type: ignore
yield {"path": binary_name, "tag": entry.tag.name, "value": entry.value}
binary_name = binary.path
for entry in binary.dynamic_entries:
yield {
"path": binary_name,
"tag": entry.tag.__name__,
"value": entry.value,
}

generator = Generator.make_generator(
["path", "tag", "value"],
Expand All @@ -128,17 +138,17 @@ def dynamic_entries_generator() -> Iterator[dict[str, Any]]:


def register_headers_generator(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the ELF headers virtual table,"""

def headers_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
yield {
"path": binary.name,
"type": binary.header.file_type.name,
"machine": binary.header.machine_type.name,
"version": binary.header.identity_version.name,
"path": binary.path,
"type": binary.header.file_type.__name__,
"machine": binary.header.machine_type.__name__,
"version": binary.header.identity_version.__name__,
"entry": binary.header.entrypoint,
"is_pie": binary.is_pie,
}
Expand All @@ -158,7 +168,7 @@ def headers_generator() -> Iterator[dict[str, Any]]:


def register_instructions_generator(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the instructions virtual table.
Expand All @@ -168,7 +178,7 @@ def instructions_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_name = binary.path

for section in binary.sections:
if section.has(lief.ELF.SECTION_FLAGS.EXECINSTR):
Expand Down Expand Up @@ -208,35 +218,35 @@ def instructions_generator() -> Iterator[dict[str, Any]]:
)


def mode(binary: lief.Binary) -> int:
def mode(binary: lief_ext.Binary) -> int:
if binary.header.identity_class == lief.ELF.ELF_CLASS.CLASS64:
return cast(int, capstone.CS_MODE_64)
raise RuntimeError(f"Unknown mode for {binary.name}")
raise RuntimeError(f"Unknown mode for {binary.path}")


def arch(binary: lief.Binary) -> int:
def arch(binary: lief_ext.Binary) -> int:
if binary.header.machine_type == lief.ELF.ARCH.x86_64:
return cast(int, capstone.CS_ARCH_X86)
raise RuntimeError(f"Unknown machine type for {binary.name}")
raise RuntimeError(f"Unknown machine type for {binary.path}")


def register_sections_generator(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the ELF sections virtual table."""

def sections_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_name = binary.path
for section in binary.sections:
yield {
"path": binary_name,
"name": section.name,
"offset": section.offset,
"size": section.size,
"type": section.type.name,
"type": section.type.__name__,
"content": bytes(section.content),
}

Expand All @@ -262,7 +272,7 @@ def coerce_section_name(name: str | None) -> str | None:


def register_strings_generator(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the ELF strings virtual table.
Expand All @@ -278,7 +288,7 @@ def strings_generator() -> Iterator[dict[str, Any]]:
]
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_name = binary.path
for strtab in strtabs:
# The first byte is always the null byte in the STRTAB
# Python also treats the final null in the string by creating
Expand Down Expand Up @@ -330,21 +340,23 @@ def split_with_index(str: str, delimiter: str) -> list[tuple[int, str]]:


def register_symbols_generator(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the ELF symbols virtual table."""

def symbols_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_name = binary.path
for symbol in symbols(binary):
# The section index can be special numbers like 65521 or 65522
# that refer to special sections so they can't be indexed
section_name: str | None = next(
(
section.name
# technically name can be bytes, for now avoid this possibility
# https://github.com/lief-project/LIEF/issues/965#issuecomment-1718702335
cast(str, section.name)
for shndx, section in enumerate(binary.sections)
if shndx == symbol.shndx
),
Expand Down Expand Up @@ -376,7 +388,7 @@ def symbols_generator() -> Iterator[dict[str, Any]]:
and symbol.symbol_version.symbol_version_auxiliary
else None
),
"type": symbol.type.name,
"type": symbol.type.__name__,
"value": symbol.value,
}

Expand Down Expand Up @@ -412,7 +424,7 @@ def symbols_generator() -> Iterator[dict[str, Any]]:


def register_version_requirements(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the ELF version requirements virtual table.
Expand All @@ -423,8 +435,8 @@ def version_requirements_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
symbol_version_req = binary.symbols_version_requirement # type: ignore
binary_name = binary.path
symbol_version_req = binary.symbols_version_requirement
for version_requirement in symbol_version_req:
file = version_requirement.name
for aux_requirement in version_requirement.get_auxiliary_symbols():
Expand All @@ -449,7 +461,7 @@ def version_requirements_generator() -> Iterator[dict[str, Any]]:


def register_version_definitions(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the ELF version requirements virtual table.
Expand All @@ -460,8 +472,8 @@ def version_definitions_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
symbol_version_def = binary.symbols_version_definition # type: ignore
binary_name = binary.path
symbol_version_def = binary.symbols_version_definition
for version_definition in symbol_version_def:
flags = version_definition.flags
for aux_definition in version_definition.auxiliary_symbols:
Expand All @@ -486,7 +498,7 @@ def version_definitions_generator() -> Iterator[dict[str, Any]]:


def register_dwarf_dies(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the DWARF DIE (Debugging Information Entry) virtual table."""

Expand Down Expand Up @@ -524,7 +536,7 @@ def dwarf_dies_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_name = binary.path
# A bit annoying but we must re-open the file
# since we are using a different library here
with open(binary_name, "rb") as f:
Expand Down Expand Up @@ -568,15 +580,15 @@ def dwarf_dies_generator() -> Iterator[dict[str, Any]]:


def register_dwarf_dies_graph(
binaries: list[lief.Binary], connection: apsw.Connection, cache_flags: CacheFlag
binaries: list[lief_ext.Binary], connection: apsw.Connection, cache_flags: CacheFlag
) -> None:
"""Create the DWARF DIE (Debugging Information Entry) graph virtual table."""

def dwarf_dies_graph_generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_name = binary.path
# A bit annoying but we must re-open the file
# since we are using a different library here
with open(binary_name, "rb") as f:
Expand Down Expand Up @@ -612,7 +624,7 @@ def dwarf_dies_graph_generator() -> Iterator[dict[str, Any]]:
)


def symbols(binary: lief.Binary) -> Sequence[lief.ELF.Symbol]:
def symbols(binary: lief_ext.Binary) -> Sequence[lief.ELF.Symbol]:
"""Use heuristic to either get static symbols or dynamic symbol table
Always return the dynamic symbol table first and then the static symbols
Expand All @@ -623,7 +635,7 @@ def symbols(binary: lief.Binary) -> Sequence[lief.ELF.Symbol]:
will not include version information.
"""
static_symbols: Sequence[lief.ELF.Symbol] = binary.static_symbols # type: ignore
dynamic_symbols = list(binary.dynamic_symbols) # type: ignore
dynamic_symbols = list(binary.dynamic_symbols)
dynamic_symbol_names = set(map(lambda s: s.name, dynamic_symbols))
all_symbols = dynamic_symbols + [
s for s in static_symbols if s.name not in dynamic_symbol_names
Expand All @@ -633,7 +645,7 @@ def symbols(binary: lief.Binary) -> Sequence[lief.ELF.Symbol]:

def register_virtual_tables(
connection: apsw.Connection,
binaries: list[lief.Binary],
binaries: list[lief_ext.Binary],
cache_flags: CacheFlag = CacheFlag.INSTRUCTIONS | CacheFlag.SYMBOLS,
) -> None:
"""Register the virtual table modules.
Expand Down
38 changes: 38 additions & 0 deletions sqlelf/lief_ext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# pyright: strict
from typing import TYPE_CHECKING, Any, Optional

# ELF.pyi has no matching py file since it's a c extension
# pyright: reportMissingModuleSource=false
# https://github.com/microsoft/pyright/issues/5950
import lief.ELF

# Let's make sure type checking works for this proxy class
# https://stackoverflow.com/questions/71365594/how-to-make-a-proxy-object-with-typing-as-underlying-object-in-python
if TYPE_CHECKING:
base = lief.ELF.Binary
else:
base = object


class Binary(base):
"""Proxy the lief.Binary object to add a path attribute.
As of https://github.com/lief-project/LIEF/issues/839 the name
attribute in lief.Binary was removed. Rather than passing around
a tuple let's create a nice proxy class.
"""

def __init__(self, path: str):
self.path = path
self.__binary: Optional[lief.ELF.Binary] = lief.ELF.parse( # pyright: ignore
path
)

if not TYPE_CHECKING:

def __getattr__(self, attr: str) -> Any:
return getattr(self.__binary, attr)

@staticmethod
def is_elf(path: str) -> bool:
return lief.is_elf(path)
Loading

0 comments on commit 63d0ee9

Please sign in to comment.