Skip to content

Commit

Permalink
Minor fixes to benchmarks and added new find_shadowed_symbols script
Browse files Browse the repository at this point in the history
  • Loading branch information
fzakaria committed Nov 3, 2023
1 parent 430097c commit 95e4e03
Show file tree
Hide file tree
Showing 4 changed files with 1,080 additions and 4 deletions.
4 changes: 3 additions & 1 deletion benchmarks/bin_symbol_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def is_elf_file(filepath):
return magic_number == b"\x7fELF"


for root, dirs, files in itertools.chain(os.walk("/bin"), os.walk("/usr/bin")):
for root, dirs, files in itertools.chain(
os.walk("/bin"), os.walk("/usr/bin"), os.walk("/lib/x86_64-linux-gnu")
):
for file in files:
full_path_file = os.path.join(root, file)
if not os.path.isfile(full_path_file) or not os.access(full_path_file, os.X_OK):
Expand Down
66 changes: 66 additions & 0 deletions benchmarks/find_shadowed_symbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#! /usr/bin/env python3
import os
import pprint
import signal
import sys

from sqlelf import elf, sql


class TimeOutException(Exception):
pass


def alarm_handler(signum, frame):
print("ALARM signal received") # noqa: T201
raise TimeOutException()


directory = sys.argv[1]

for root, dirs, files in os.walk(directory):
for file in files:
try:
full_filename = os.path.join(root, file)
print(f"Investigating {full_filename}") # noqa: T201

# setup the timeout
signal.signal(signal.SIGALRM, alarm_handler)
signal.alarm(10)

engine = sql.make_sql_engine(
[full_filename], recursive=True, cache_flags=elf.CacheFlag.SYMBOLS
)
# ignore .bss since it is symbols which are declared but have no value
# i need to understand them more but they tend to show up.
# let us also exclude the main binary and catch only interposition
# from shared objects
results = engine.execute(
"""
SELECT name, elf_symbols.version, count(*) as symbol_count,
GROUP_CONCAT(elf_headers.path, ':') as libraries
FROM elf_symbols, elf_headers
WHERE elf_symbols.path = elf_headers.path AND
elf_headers.is_pie = 0 AND
exported = TRUE AND section != '.bss'
GROUP BY name, elf_symbols.version
HAVING count(*) >= 2
"""
)
rows = list(results)
rows = list(
filter(
lambda r: "libc" not in r["libraries"]
and "libm" not in r["libraries"],
rows,
)
)
if len(rows) > 0:
print(f"Found {len(rows)} duplicate symbols") # noqa: T201
pprint.pprint(rows) # noqa: T203
except TimeOutException as ex:
print(ex) # noqa: T201
except Exception as ex:
print(ex) # noqa: T201
finally:
signal.alarm(0)
3 changes: 3 additions & 0 deletions benchmarks/graph_bin_symbol_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ggplot,
labs,
save_as_pdf_pages,
scale_y_log10,
theme_minimal,
)

Expand All @@ -22,12 +23,14 @@

# Convert to DataFrame
df = pd.DataFrame(list(data_dict.items()), columns=["File", "Symbols"])
df = df.query("Symbols > 0")

# Create plot
plot = (
ggplot(df, aes(x="Symbols", y=after_stat("ncount")))
+ geom_histogram(bins=100, fill="skyblue", color="black", alpha=0.7)
+ labs(title="", x="Number of Symbols", y="Normalized Count")
+ scale_y_log10(expand=(0, 0, 0.1, 0))
+ theme_minimal()
)

Expand Down
Loading

0 comments on commit 95e4e03

Please sign in to comment.