From c0f9ebfd17763093f75195eddb8285c35737e8f7 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Fri, 1 Sep 2023 10:43:41 -0700 Subject: [PATCH] Create temporary tables to improve performance (#5) Create temporary tables for symbols and instructions to improve performance. There is an upfront cost to ingesting the data into a temporary table but it's relatively nominal (2s) and every query is WAY faster afterwards. Also moved pyright to a poetry dependency. --- Makefile | 4 +++- flake.nix | 2 +- poetry.lock | 36 +++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + sqlelf/elf/instruction.py | 8 +++++++- sqlelf/elf/section.py | 6 ++++++ sqlelf/elf/symbol.py | 21 +++++++++++++++++++-- 7 files changed, 72 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 28860fc..701c143 100644 --- a/Makefile +++ b/Makefile @@ -23,5 +23,7 @@ fmt: ## Format code using black & isort. lint: ## Run pep8, black, mypy linters. flake8 sqlelf/ black --check sqlelf/ - pyright +# TODO(fzakaria): without pythonpath it picks up the wrong python +# and then does not find the venv for the imports + pyright --pythonpath $(shell which python) nixpkgs-fmt --check . \ No newline at end of file diff --git a/flake.nix b/flake.nix index 13aec83..57fb5a7 100644 --- a/flake.nix +++ b/flake.nix @@ -27,6 +27,6 @@ }; devShell = pkgs.sqlelf-env.env.overrideAttrs - (oldAttrs: { buildInputs = with pkgs; [ poetry pyright nixpkgs-fmt ]; }); + (oldAttrs: { buildInputs = with pkgs; [ poetry nixpkgs-fmt ]; }); }); } diff --git a/poetry.lock b/poetry.lock index 5a35566..7831931 100644 --- a/poetry.lock +++ b/poetry.lock @@ -259,6 +259,21 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "nodeenv" +version = "1.8.0" +description = "Node.js virtual environment builder" +category = "dev" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +files = [ + {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, + {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, +] + +[package.dependencies] +setuptools = "*" + [[package]] name = "packaging" version = "23.1" @@ -323,6 +338,25 @@ files = [ {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, ] +[[package]] +name = "pyright" +version = "1.1.325" +description = "Command line wrapper for pyright" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyright-1.1.325-py3-none-any.whl", hash = "sha256:8f3ab88ba4843f053ab5b5c886d676161aba6f446776bfb57cc0434ed4d88672"}, + {file = "pyright-1.1.325.tar.gz", hash = "sha256:879a3f66944ffd59d3facd54872fed814830fed64daf3e8eb71b146ddd83bb67"}, +] + +[package.dependencies] +nodeenv = ">=1.6.0" + +[package.extras] +all = ["twine (>=3.4.1)"] +dev = ["twine (>=3.4.1)"] + [[package]] name = "setuptools" version = "68.1.2" @@ -355,4 +389,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = ">=3.10" -content-hash = "0871b2e72bc3a35e481b388aaccd898b4225a2f5ab3368770188a2411cfab7de" +content-hash = "838653258e74177629af577ca8ec4cebaa7b06ab0fac57c0cce39c2319db4bbd" diff --git a/pyproject.toml b/pyproject.toml index ceb8ad3..b4ab9ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ setuptools = "*" black = "^23.7.0" isort = "^5.12.0" flake8 = "^6.1.0" +pyright = "^1.1.325" [build-system] requires = ["poetry-core"] diff --git a/sqlelf/elf/instruction.py b/sqlelf/elf/instruction.py index eeb4da9..caa3183 100644 --- a/sqlelf/elf/instruction.py +++ b/sqlelf/elf/instruction.py @@ -63,4 +63,10 @@ def register(connection: apsw.Connection, binaries: list[lief.Binary]): generator.columns, generator.column_access = apsw.ext.get_column_names( next(generator()) ) - apsw.ext.make_virtual_module(connection, "elf_instructions", generator) + apsw.ext.make_virtual_module(connection, "raw_elf_instructions", generator) + connection.execute( + """ + CREATE TEMP TABLE elf_instructions + AS SELECT * FROM raw_elf_instructions; + """ + ) diff --git a/sqlelf/elf/section.py b/sqlelf/elf/section.py index 4544ecc..969440c 100644 --- a/sqlelf/elf/section.py +++ b/sqlelf/elf/section.py @@ -27,6 +27,12 @@ def generator() -> Iterator[dict[str, Any]]: return generator +def section_name(name: str | None) -> str | None: + if name == "": + return "undefined" + return name + + def register(connection: apsw.Connection, binaries: list[lief.Binary]): generator = elf_sections(binaries) # setup columns and access by providing an example of the first entry returned diff --git a/sqlelf/elf/symbol.py b/sqlelf/elf/symbol.py index 1900260..8d4c965 100644 --- a/sqlelf/elf/symbol.py +++ b/sqlelf/elf/symbol.py @@ -6,6 +6,7 @@ import apsw import apsw.ext import lief +from ..elf.section import section_name as elf_section_name def elf_symbols(binaries: list[lief.Binary]): @@ -15,6 +16,16 @@ def generator() -> Iterator[dict[str, Any]]: # as they can be costly binary_name = binary.name for symbol in binary.symbols: + # The section index can be special numbers like 65521 or 65522 + # that refer to special sections so they can't be indexed + section_name: str | None = next( + ( + section.name + for shndx, section in enumerate(binary.sections) + if shndx == symbol.shndx + ), + None, + ) yield { "path": binary_name, "name": symbol.name, @@ -30,7 +41,7 @@ def generator() -> Iterator[dict[str, Any]]: # https://www.m4b.io/elf/export/binary/analysis/2015/05/25/what-is-an-elf-export.html "imported": symbol.imported, "exported": symbol.exported, - "section": binary.sections[symbol.shndx].name, + "section": elf_section_name(section_name), "size": symbol.size, } @@ -43,4 +54,10 @@ def register(connection: apsw.Connection, binaries: list[lief.Binary]): generator.columns, generator.column_access = apsw.ext.get_column_names( next(generator()) ) - apsw.ext.make_virtual_module(connection, "elf_symbols", generator) + apsw.ext.make_virtual_module(connection, "raw_elf_symbols", generator) + connection.execute( + """ + CREATE TEMP TABLE elf_symbols + AS SELECT * FROM raw_elf_symbols; + """ + )