diff --git a/flake.nix b/flake.nix index 57fb5a7..39c91ac 100644 --- a/flake.nix +++ b/flake.nix @@ -17,7 +17,7 @@ let pkgs = import nixpkgs { inherit system; - overlays = [ poetry2nix.overlay (import ./overlay.nix) ]; + overlays = [ poetry2nix.overlay (import ./nix/overlay.nix) ]; }; in { diff --git a/derivation.nix b/nix/derivation.nix similarity index 86% rename from derivation.nix rename to nix/derivation.nix index e025b3f..13e2406 100644 --- a/derivation.nix +++ b/nix/derivation.nix @@ -1,5 +1,5 @@ { poetry2nix, poetryOverrides }: poetry2nix.mkPoetryApplication { - projectDir = ./.; + projectDir = ../.; overrides = poetry2nix.overrides.withDefaults poetryOverrides; } diff --git a/nix/lief.nix b/nix/lief.nix new file mode 100644 index 0000000..e0e6908 --- /dev/null +++ b/nix/lief.nix @@ -0,0 +1,44 @@ +# This is an unreleased version of Lief that fixes a bug when generates GNU notes +# https://github.com/lief-project/LIEF/commit/3414ded8cdcbd9705f7871c66c212b15cd74ea69 +# Nixpkgs derivation was updated to change how lief was built since it no longer has setup.py +# in the root of the directory. +# For now, we copy the derivation until it's merged into nixpkgs we are tracking. +# https://github.com/NixOS/nixpkgs/pull/251414 +{ fetchFromGitHub, python, stdenv, cmake, ninja }: +let + pyEnv = python.withPackages (ps: [ ps.setuptools ps.tomli ps.pip ps.setuptools ]); +in +stdenv.mkDerivation rec { + pname = "lief"; + version = "0.14.0-3414ded"; + src = fetchFromGitHub { + owner = "lief-project"; + repo = "LIEF"; + rev = "3414ded8cdcbd9705f7871c66c212b15cd74ea69"; + sha256 = "sha256-GJTj4w8HhAiC2bQAjEIqPw9feaOHL4fmAfLACioW0Q0="; + }; + outputs = [ "out" "py" ]; + + nativeBuildInputs = [ + cmake + ninja + ]; + + # Not a propagatedBuildInput because only the $py output needs it; $out is + # just the library itself (e.g. C/C++ headers). + buildInputs = [ + python + ]; + + postBuild = '' + pushd /build/source/api/python + ${pyEnv.interpreter} setup.py build --parallel=$NIX_BUILD_CORES + popd + ''; + + postInstall = '' + pushd /build/source/api/python + ${pyEnv.interpreter} setup.py install --skip-build --root=/ --prefix=$py + popd + ''; +} diff --git a/overlay.nix b/nix/overlay.nix similarity index 86% rename from overlay.nix rename to nix/overlay.nix index 3a103be..bab1511 100644 --- a/overlay.nix +++ b/nix/overlay.nix @@ -3,7 +3,7 @@ self: super: { sqlelf = self.callPackage ./derivation.nix { }; sqlelf-env = self.poetry2nix.mkPoetryEnv { - projectDir = ./.; + projectDir = ../.; overrides = self.poetry2nix.overrides.withDefaults self.poetryOverrides; editablePackageSources = { sqlelf = ./sqlelf; }; }; @@ -16,10 +16,16 @@ self: super: { }; }); + + lief-3414ded = self.callPackage ./lief.nix { python = self.python3; }; + poetryOverrides = self: super: { + lief = super.toPythonModule super.pkgs.lief-3414ded.py; + sh = super.sh.overridePythonAttrs (old: { buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry ]; }); + apsw = super.apsw.overridePythonAttrs (old: rec { version = "3.43.0.0"; src = super.pkgs.fetchFromGitHub { diff --git a/pyproject.toml b/pyproject.toml index 580ad23..fe8a946 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ license = "LICENSE" [tool.poetry.dependencies] python = ">=3.10,<4.0" capstone = "^5.0.1" -lief = "^0.13.2" +lief = ">=0.13.2" apsw = "^3.43.0.0" # TODO(fzakaria): Would love to specify this as an exact version # but I was getting weird failures with `nix build` @@ -40,7 +40,7 @@ profile = "black" addopts = "" [tool.pyright] -include = ["sqlelf"] +include = ["sqlelf", "tests"] exclude = ["**/__pycache__"] reportMissingImports = true diff --git a/sqlelf/cli.py b/sqlelf/cli.py index 1e70c27..438e033 100644 --- a/sqlelf/cli.py +++ b/sqlelf/cli.py @@ -7,11 +7,10 @@ import apsw import apsw.bestpractice import apsw.shell -import lief from sqlelf import ldd - -from .elf import dynamic, header, instruction, section, strings, symbol +from sqlelf.elf import dynamic, header, instruction, section, strings, symbol +from sqlelf.elf.binary import Binary def start(args=sys.argv[1:], stdin=sys.stdin): @@ -55,13 +54,15 @@ def start(args=sys.argv[1:], stdin=sys.stdin): ), ) # Filter the list of filenames to those that are ELF files only - filenames = list(filter(lambda f: os.path.isfile(f) and lief.is_elf(f), filenames)) + filenames = list( + filter(lambda f: os.path.isfile(f) and Binary.is_elf(f), filenames) + ) # If none of the inputs are valid files, simply return if len(filenames) == 0: return - binaries: list[lief.Binary] = [lief.parse(filename) for filename in filenames] + binaries: list[Binary] = [Binary(filename) for filename in filenames] # If the recursive option is specidied, load the shared libraries # the binary would load as well. @@ -76,7 +77,7 @@ def start(args=sys.argv[1:], stdin=sys.stdin): for library in sub_list ] ) - binaries = binaries + [lief.parse(library) for library in shared_libraries] + binaries = binaries + [Binary(library) for library in shared_libraries] # forward sqlite logs to logging module apsw.bestpractice.apply(apsw.bestpractice.recommended) diff --git a/sqlelf/elf/binary.py b/sqlelf/elf/binary.py new file mode 100644 index 0000000..d04fc5a --- /dev/null +++ b/sqlelf/elf/binary.py @@ -0,0 +1,33 @@ +# pyright: strict +from typing import TYPE_CHECKING, Any + +import lief + +# Let's make sure type checking works for this proxy class +# https://stackoverflow.com/questions/71365594/how-to-make-a-proxy-object-with-typing-as-underlying-object-in-python +if TYPE_CHECKING: + base = lief.ELF.Binary +else: + base = object + + +class Binary(base): + """Proxy the lief.Binary object to add a path attribute. + + As of https://github.com/lief-project/LIEF/issues/839 the name + attribute in lief.Binary was removed. Rather than passing around + a tuple let's create a nice proxy class. + """ + + def __init__(self, path: str): + self.path = path + self.__binary = lief.parse(path) + + if not TYPE_CHECKING: + + def __getattr__(self, attr: str) -> Any: + return getattr(self.__binary, attr) + + @staticmethod + def is_elf(path: str): + return lief.is_elf(path) diff --git a/sqlelf/elf/dynamic.py b/sqlelf/elf/dynamic.py index d933b59..5c8c0b4 100644 --- a/sqlelf/elf/dynamic.py +++ b/sqlelf/elf/dynamic.py @@ -5,24 +5,29 @@ import apsw import apsw.ext -import lief + +from sqlelf.elf.binary import Binary # This is effectively the .dynamic section but it is elevated as a table here # since it is widely used and can benefit from simpler table access. -def elf_dynamic_entries(binaries: list[lief.Binary]): +def elf_dynamic_entries(binaries: list[Binary]): def generator() -> Iterator[dict[str, Any]]: for binary in binaries: # super important that these accessors are pulled out of the tight loop # as they can be costly - binary_name = binary.name - for entry in binary.dynamic_entries: # pyright: ignore - yield {"path": binary_name, "tag": entry.tag.name, "value": entry.value} + binary_path = binary.path + for entry in binary.dynamic_entries: + yield { + "path": binary_path, + "tag": entry.tag.__name__, + "value": entry.value, + } return generator -def register(connection: apsw.Connection, binaries: list[lief.Binary]): +def register(connection: apsw.Connection, binaries: list[Binary]): generator = elf_dynamic_entries(binaries) # setup columns and access by providing an example of the first entry returned generator.columns, generator.column_access = apsw.ext.get_column_names( diff --git a/sqlelf/elf/header.py b/sqlelf/elf/header.py index 2f26402..89d62ca 100644 --- a/sqlelf/elf/header.py +++ b/sqlelf/elf/header.py @@ -1,28 +1,26 @@ -# Without this Python was complaining -from __future__ import annotations - from typing import Any, Iterator import apsw import apsw.ext -import lief + +from sqlelf.elf.binary import Binary -def elf_headers(binaries: list[lief.Binary]): +def elf_headers(binaries: list[Binary]): def generator() -> Iterator[dict[str, Any]]: for binary in binaries: yield { - "path": binary.name, - "type": binary.header.file_type.name, - "machine": binary.header.machine_type.name, - "version": binary.header.identity_version.name, + "path": binary.path, + "type": binary.header.file_type.__name__, + "machine": binary.header.machine_type.__name__, + "version": binary.header.identity_version.__name__, "entry": binary.header.entrypoint, } return generator -def register(connection: apsw.Connection, binaries: list[lief.Binary]): +def register(connection: apsw.Connection, binaries: list[Binary]): generator = elf_headers(binaries) # setup columns and access by providing an example of the first entry returned generator.columns, generator.column_access = apsw.ext.get_column_names( diff --git a/sqlelf/elf/instruction.py b/sqlelf/elf/instruction.py index 7637e17..fa6580f 100644 --- a/sqlelf/elf/instruction.py +++ b/sqlelf/elf/instruction.py @@ -10,13 +10,15 @@ import capstone # pyright: ignore import lief +from sqlelf.elf.binary import Binary -def elf_instructions(binaries: list[lief.Binary]): + +def elf_instructions(binaries: list[Binary]): def generator() -> Iterator[dict[str, Any]]: for binary in binaries: # super important that these accessors are pulled out of the tight loop # as they can be costly - binary_name = binary.name + binary_path = binary.path for section in binary.sections: if section.has(lief.ELF.SECTION_FLAGS.EXECINSTR): @@ -34,7 +36,7 @@ def generator() -> Iterator[dict[str, Any]]: data, section.virtual_address ): yield { - "path": binary_name, + "path": binary_path, "section": section_name, "mnemonic": mnemonic, "address": address, @@ -44,19 +46,19 @@ def generator() -> Iterator[dict[str, Any]]: return generator -def mode(binary: lief.Binary) -> int: +def mode(binary: Binary) -> int: if binary.header.identity_class == lief.ELF.ELF_CLASS.CLASS64: return capstone.CS_MODE_64 - raise Exception(f"Unknown mode for {binary.name}") + raise Exception(f"Unknown mode for {binary.path}") -def arch(binary: lief.Binary) -> int: +def arch(binary: Binary) -> int: if binary.header.machine_type == lief.ELF.ARCH.x86_64: return capstone.CS_ARCH_X86 - raise Exception(f"Unknown machine type for {binary.name}") + raise Exception(f"Unknown machine type for {binary.path}") -def register(connection: apsw.Connection, binaries: list[lief.Binary]): +def register(connection: apsw.Connection, binaries: list[Binary]): generator = elf_instructions(binaries) # setup columns and access by providing an example of the first entry returned generator.columns, generator.column_access = apsw.ext.get_column_names( diff --git a/sqlelf/elf/section.py b/sqlelf/elf/section.py index 969440c..a40b499 100644 --- a/sqlelf/elf/section.py +++ b/sqlelf/elf/section.py @@ -5,22 +5,23 @@ import apsw import apsw.ext -import lief +from sqlelf.elf.binary import Binary -def elf_sections(binaries: list[lief.Binary]): + +def elf_sections(binaries: list[Binary]): def generator() -> Iterator[dict[str, Any]]: for binary in binaries: # super important that these accessors are pulled out of the tight loop # as they can be costly - binary_name = binary.name + binary_path = binary.path for section in binary.sections: yield { - "path": binary_name, + "path": binary_path, "name": section.name, "offset": section.offset, "size": section.size, - "type": section.type.name, + "type": section.type.__name__, "content": bytes(section.content), } @@ -33,7 +34,7 @@ def section_name(name: str | None) -> str | None: return name -def register(connection: apsw.Connection, binaries: list[lief.Binary]): +def register(connection: apsw.Connection, binaries: list[Binary]): generator = elf_sections(binaries) # setup columns and access by providing an example of the first entry returned generator.columns, generator.column_access = apsw.ext.get_column_names( diff --git a/sqlelf/elf/strings.py b/sqlelf/elf/strings.py index 456ead3..db076b0 100644 --- a/sqlelf/elf/strings.py +++ b/sqlelf/elf/strings.py @@ -7,8 +7,10 @@ import apsw.ext import lief +from sqlelf.elf.binary import Binary -def elf_strings(binaries: list[lief.Binary]): + +def elf_strings(binaries: list[Binary]): def generator() -> Iterator[dict[str, Any]]: for binary in binaries: strtabs = [ @@ -18,19 +20,19 @@ def generator() -> Iterator[dict[str, Any]]: ] # super important that these accessors are pulled out of the tight loop # as they can be costly - binary_name = binary.name + binary_path = binary.path for strtab in strtabs: # The first byte is always the null byte in the STRTAB # Python also treats the final null in the string by creating # an empty item so we chop it off. # https://stackoverflow.com/a/18970869 for string in str(strtab.content[1:-1], "utf-8").split("\x00"): - yield {"path": binary_name, "section": strtab.name, "value": string} + yield {"path": binary_path, "section": strtab.name, "value": string} return generator -def register(connection: apsw.Connection, binaries: list[lief.Binary]): +def register(connection: apsw.Connection, binaries: list[Binary]): generator = elf_strings(binaries) # setup columns and access by providing an example of the first entry returned generator.columns, generator.column_access = apsw.ext.get_column_names( diff --git a/sqlelf/elf/symbol.py b/sqlelf/elf/symbol.py index 66b3a27..343b605 100644 --- a/sqlelf/elf/symbol.py +++ b/sqlelf/elf/symbol.py @@ -5,17 +5,17 @@ import apsw import apsw.ext -import lief -from ..elf.section import section_name as elf_section_name +from sqlelf.elf.binary import Binary +from sqlelf.elf.section import section_name as elf_section_name -def elf_symbols(binaries: list[lief.Binary]): +def elf_symbols(binaries: list[Binary]): def generator() -> Iterator[dict[str, Any]]: for binary in binaries: # super important that these accessors are pulled out of the tight loop # as they can be costly - binary_name = binary.name + binary_path = binary.path for symbol in binary.symbols: # The section index can be special numbers like 65521 or 65522 # that refer to special sections so they can't be indexed @@ -26,9 +26,10 @@ def generator() -> Iterator[dict[str, Any]]: if shndx == symbol.shndx ), None, - ) + ) # pyright: ignore (https://github.com/lief-project/LIEF/issues/965) + yield { - "path": binary_name, + "path": binary_path, "name": symbol.name, "demangled_name": symbol.demangled_name, # A bit of detailed explanation here to explain these values. @@ -47,14 +48,15 @@ def generator() -> Iterator[dict[str, Any]]: # TODO(fzakaria): Better understand why is it auxiliary? # this returns versions like GLIBC_2.2.5 "version": symbol.symbol_version.symbol_version_auxiliary.name - if symbol.symbol_version.symbol_version_auxiliary + if symbol.symbol_version + and symbol.symbol_version.symbol_version_auxiliary else None, } return generator -def register(connection: apsw.Connection, binaries: list[lief.Binary]): +def register(connection: apsw.Connection, binaries: list[Binary]): generator = elf_symbols(binaries) # setup columns and access by providing an example of the first entry returned generator.columns, generator.column_access = apsw.ext.get_column_names( diff --git a/sqlelf/ldd.py b/sqlelf/ldd.py index 0db6832..3b606d3 100644 --- a/sqlelf/ldd.py +++ b/sqlelf/ldd.py @@ -2,14 +2,15 @@ from collections import OrderedDict from typing import Dict -import lief import sh # pyright: ignore +from sqlelf.elf.binary import Binary -def libraries(binary: lief.Binary) -> Dict[str, str]: + +def libraries(binary: Binary) -> Dict[str, str]: """Use the interpreter in a binary to determine the path of each linked library""" interpreter = sh.Command(binary.interpreter) # pyright: ignore - resolution = interpreter("--list", binary.name) + resolution = interpreter("--list", binary.path) result = OrderedDict() # TODO: Figure out why `--list` and `ldd` produce different outcomes # specifically for the interpreter. diff --git a/tests/test_ldd.py b/tests/test_ldd.py index aa31e2f..19c2637 100644 --- a/tests/test_ldd.py +++ b/tests/test_ldd.py @@ -1,16 +1,16 @@ from sqlelf import ldd -import lief +from sqlelf.elf.binary import Binary from unittest.mock import patch def test_simple_binary_real(): - binary = lief.parse("/bin/ls") + binary = Binary("/bin/ls") result = ldd.libraries(binary) assert len(result) > 0 @patch("sh.Command") def test_simple_binary_mocked(Command): - binary = lief.parse("/bin/ls") + binary = Binary("/bin/ls") Command(binary.interpreter).return_value = """ linux-vdso.so.1 (0x00007ffc5d8ff000) /lib/x86_64-linux-gnu/libnss_cache.so.2 (0x00007f6995d92000)