Skip to content

Commit

Permalink
Create temporary tables to improve performance (#5)
Browse files Browse the repository at this point in the history
Create temporary tables for symbols and instructions to improve
performance.

There is an upfront cost to ingesting the data into a temporary table
but it's relatively nominal (2s) and every query is WAY faster
afterwards.

Also moved pyright to a poetry dependency.
  • Loading branch information
fzakaria authored Sep 1, 2023
1 parent 8097560 commit c0f9ebf
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 6 deletions.
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,7 @@ fmt: ## Format code using black & isort.
lint: ## Run pep8, black, mypy linters.
flake8 sqlelf/
black --check sqlelf/
pyright
# TODO(fzakaria): without pythonpath it picks up the wrong python
# and then does not find the venv for the imports
pyright --pythonpath $(shell which python)
nixpkgs-fmt --check .
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,6 @@
};

devShell = pkgs.sqlelf-env.env.overrideAttrs
(oldAttrs: { buildInputs = with pkgs; [ poetry pyright nixpkgs-fmt ]; });
(oldAttrs: { buildInputs = with pkgs; [ poetry nixpkgs-fmt ]; });
});
}
36 changes: 35 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ setuptools = "*"
black = "^23.7.0"
isort = "^5.12.0"
flake8 = "^6.1.0"
pyright = "^1.1.325"

[build-system]
requires = ["poetry-core"]
Expand Down
8 changes: 7 additions & 1 deletion sqlelf/elf/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,10 @@ def register(connection: apsw.Connection, binaries: list[lief.Binary]):
generator.columns, generator.column_access = apsw.ext.get_column_names(
next(generator())
)
apsw.ext.make_virtual_module(connection, "elf_instructions", generator)
apsw.ext.make_virtual_module(connection, "raw_elf_instructions", generator)
connection.execute(
"""
CREATE TEMP TABLE elf_instructions
AS SELECT * FROM raw_elf_instructions;
"""
)
6 changes: 6 additions & 0 deletions sqlelf/elf/section.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def generator() -> Iterator[dict[str, Any]]:
return generator


def section_name(name: str | None) -> str | None:
if name == "":
return "undefined"
return name


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
generator = elf_sections(binaries)
# setup columns and access by providing an example of the first entry returned
Expand Down
21 changes: 19 additions & 2 deletions sqlelf/elf/symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import apsw
import apsw.ext
import lief
from ..elf.section import section_name as elf_section_name


def elf_symbols(binaries: list[lief.Binary]):
Expand All @@ -15,6 +16,16 @@ def generator() -> Iterator[dict[str, Any]]:
# as they can be costly
binary_name = binary.name
for symbol in binary.symbols:
# The section index can be special numbers like 65521 or 65522
# that refer to special sections so they can't be indexed
section_name: str | None = next(
(
section.name
for shndx, section in enumerate(binary.sections)
if shndx == symbol.shndx
),
None,
)
yield {
"path": binary_name,
"name": symbol.name,
Expand All @@ -30,7 +41,7 @@ def generator() -> Iterator[dict[str, Any]]:
# https://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html
"imported": symbol.imported,
"exported": symbol.exported,
"section": binary.sections[symbol.shndx].name,
"section": elf_section_name(section_name),
"size": symbol.size,
}

Expand All @@ -43,4 +54,10 @@ def register(connection: apsw.Connection, binaries: list[lief.Binary]):
generator.columns, generator.column_access = apsw.ext.get_column_names(
next(generator())
)
apsw.ext.make_virtual_module(connection, "elf_symbols", generator)
apsw.ext.make_virtual_module(connection, "raw_elf_symbols", generator)
connection.execute(
"""
CREATE TEMP TABLE elf_symbols
AS SELECT * FROM raw_elf_symbols;
"""
)

0 comments on commit c0f9ebf

Please sign in to comment.