Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pandalyzer #37

Draft
wants to merge 28 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
7b5a3ff
init pandalyzer
matthiasdiener Nov 28, 2020
29b9ba0
plot cleanups
matthiasdiener Nov 29, 2020
d3229c7
small cleanups
matthiasdiener Nov 29, 2020
c7d355a
Merge branch 'master' into pandalyzer
matthiasdiener Nov 30, 2020
5a749d4
unit
matthiasdiener Dec 1, 2020
a05a5b1
adjust file naming
matthiasdiener Dec 2, 2020
4925e7b
fix
matthiasdiener Dec 2, 2020
98df3ac
better support for in memory DB
matthiasdiener Dec 2, 2020
962cb93
Merge branch 'file-naming' into pandalyzer
matthiasdiener Dec 2, 2020
25d5008
Merge branch 'master' into pandalyzer
matthiasdiener Dec 20, 2020
880f76c
small fixes
matthiasdiener Jan 4, 2021
c81c5c8
merge main
matthiasdiener Apr 9, 2021
d442db2
Merge branch 'main' into pandalyzer
matthiasdiener Apr 9, 2021
ad169bf
Merge branch 'main' into pandalyzer
matthiasdiener Jul 15, 2021
75695c3
Merge branch 'main' into pandalyzer
matthiasdiener Sep 6, 2021
7986907
Merge branch 'main' into pandalyzer
matthiasdiener Feb 12, 2023
1798bf3
Merge branch 'main' into pandalyzer
matthiasdiener Feb 21, 2023
db91343
runprops(prop)
matthiasdiener Feb 27, 2023
76e269f
better y label
matthiasdiener Feb 27, 2023
2f1158a
lint fixes
matthiasdiener Feb 27, 2023
eaa117b
Merge branch 'main' into pandalyzer
matthiasdiener Mar 23, 2023
7fc98a9
Merge branch 'main' into pandalyzer
matthiasdiener Apr 7, 2023
4829ddb
accept kwargs for plotting
matthiasdiener Apr 7, 2023
90e4bdc
isort
matthiasdiener Apr 7, 2023
0689f29
Merge branch 'main' into pandalyzer
matthiasdiener Apr 27, 2023
035fffc
Merge branch 'main' into pandalyzer
matthiasdiener Oct 23, 2023
5a884b5
Merge branch 'main' into pandalyzer
matthiasdiener Jan 5, 2024
4b4eb98
Merge branch 'main' into pandalyzer
matthiasdiener Jan 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
init pandalyzer
matthiasdiener committed Nov 28, 2020

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
commit 7b5a3fff8537880f2941b1a7eefac9b6bf03d0d4
33 changes: 33 additions & 0 deletions bin/pandalyzer
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#! /usr/bin/env python


def main():
import sys
import argparse

parser = argparse.ArgumentParser(description="Analyze a logpyle database.")
parser.add_argument("-c", "--commands", action="store",
help="commands to execute")
parser.add_argument("dbfile", help="database file to read")
parser.add_argument("scriptfile", nargs="?", help="script file to read")
args = parser.parse_args()

from logpyle.pandalyzer import make_db, make_pandalyzer_symbols, PandalyzerConsole

if args.scriptfile:
db = make_db(args.dbfile, interactive=False)
exec(compile(open(args.scriptfile).read(), args.scriptfile, "exec"),
make_pandalyzer_symbols(db))
elif args.commands:
db = make_db(args.dbfile, interactive=False)
exec(compile(args.commands, "--commands", "exec"), make_pandalyzer_symbols(db))
else:
db = make_db(args.dbfile, interactive=True)
from logpyle.runalyzer import RunalyzerConsole
cons = PandalyzerConsole(db)
cons.interact("Pandalyzer running on Python" + sys.version.split('\n')[0] +
"\nRun help() to see help for 'magic' commands.")


if __name__ == "__main__":
main()
217 changes: 217 additions & 0 deletions logpyle/pandalyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import code
import pandas as pd
from sqlalchemy import create_engine
from warnings import warn
from pytools import Table

try:
import readline
import rlcompleter # noqa: F401
HAVE_READLINE = True
except ImportError:
HAVE_READLINE = False


# Commands:
# .help show this help message
# .q SQL execute a (potentially mangled) query
# .runprops show a list of run properties
# .quantities show a list of time-dependent quantities

# Plotting:
# .plot SQL plot results of (potentially mangled) query.
# result sets can be (x,y) or (x,y,descr1,descr2,...),
# in which case a new plot will be started for each
# tuple (descr1, descr2, ...)
# .scatter SQL make scatterplot results of (potentially mangled) query.
# result sets can have between two and four columns
# for (x,y,size,color).

# SQL mangling, if requested ("MagicSQL"):
# select $quantity where pred(feature)

# Custom SQLite aggregates:
# stddev, var, norm1, norm2

# Available Python symbols:
# db: the SQLite database
# mangle_sql(query_str): mangle the SQL query string query_str
# q(query_str): get db cursor for mangled query_str
# dbplot(cursor): plot result of cursor
# dbscatter(cursor): make scatterplot result of cursor
# dbprint(cursor): print result of cursor
# split_cursor(cursor): x,y,data gather that .plot uses internally
# table_from_cursor(cursor)

def table_from_df(df, header=None, skip_index=True):
tbl = Table()

if header:
tbl.add_row(header)
else:
tbl.add_row(df.columns)

for row in df.itertuples():
if skip_index:
tbl.add_row(row[1:])
else:
tbl.add_row(row)

return tbl

def pandalyzer_help():
print("""
Commands:
help() show this help message
runprops() show a list of run properties
quantities() show a list of time-dependent quantities

Plotting:
plot() plot list of quantities.

Custom SQLite aggregates:
stddev, var, norm1, norm2

Available Python symbols:
db: the database
q(query_str): get db cursor for mangled query_str
dbplot(cursor): plot result of cursor
dbscatter(cursor): make scatterplot result of cursor
dbprint(cursor): print result of cursor
split_cursor(cursor): x,y,data gather that .plot uses internally
table_from_cursor(cursor)
""")

def make_pandalyzer_symbols(db):
return {
"__name__": "__console__",
"__doc__": None,
"help" : pandalyzer_help,
"runprops" : db.runprops,
"quantities" : db.quantities,
"db": db,
"dump": db.dump,
"plot": db.plot,
## "q": db.q,
# "dbplot": db.plot_cursor,
# "dbscatter": db.scatter_cursor,
# "dbprint": db.print_cursor,
# "split_cursor": split_cursor,
# "table_from_cursor": table_from_cursor,
}


class RunDB:
def __init__(self, engine, interactive):
self.engine = engine
self.interactive = interactive
self.rank_agg_tables = set()
self.tables = {}

def _get_table(self, table_name: str):
try:
return self.tables[table_name]
except KeyError:
try:
self.tables[table_name] = pd.read_sql_table(table_name, self.engine)
return self.tables[table_name]
except ValueError:
if table_name == "runs":
warn("Run runalyzer-gather first.")
else:
warn(f"No such table '{table_name}'.")


def runprops(self):
print(table_from_df(self._get_table("runs").transpose(), header=["Property", "Value"], skip_index=False))

def quantities(self) -> None:
print(table_from_df(self._get_table("quantities")))

def plot(self, values: list, kind: str = "line"):
from matplotlib.pyplot import plot, show, legend

data = []
legend_entries = []
for v in values:
data.append(self._get_table(v).value)
legend_entries.append(v+ " ["+self.get_unit_for_quantity(v)+"]")


df = pd.concat(data, axis=1, keys=values)

# print(data)
# df = pd.DataFrame([data]).transpose()
# print(df)
p = df.plot(kind=kind)
# print(keys)
legend(legend_entries)
# for v in values:
# p.axes.set_xlabel(values[0])
p.axes.set_xlabel("step")
show()
return p

def dump(self, table_name: str) -> None:
print(table_from_df(self._get_table(table_name)))

def get_unit_for_quantity(self, quantity: str) -> str:
q = self._get_table("quantities")
print(type(q.loc[q.name==quantity].unit))
return q.loc[q.name==quantity].unit.to_string(index=False)



def make_db(file, interactive):
engine = create_engine(f"sqlite:///{file}")
return RunDB(engine, interactive=interactive)


class PandalyzerConsole(code.InteractiveConsole):
def __init__(self, db):
self.db = db
code.InteractiveConsole.__init__(self,
make_pandalyzer_symbols(db))

try:
import numpy # noqa: F401
self.runsource("from numpy import *")
except ImportError:
pass

try:
import matplotlib.pyplot # noqa
self.runsource("from matplotlib.pyplot import *")
except ImportError:
pass
except RuntimeError:
pass

if HAVE_READLINE:
import os
import atexit

histfile = os.path.join(os.environ["HOME"], ".runalyzerhist")
if os.access(histfile, os.R_OK):
readline.read_history_file(histfile)
atexit.register(readline.write_history_file, histfile)
readline.parse_and_bind("tab: complete")

self.last_push_result = False

def push(self, cmdline):
self.last_push_result = code.InteractiveConsole.push(self, cmdline)

return self.last_push_result

# elif cmd == "plot":
# cursor = self.db.db.execute(self.db.mangle_sql(args))
# columnnames = [column[0] for column in cursor.description]
# self.db.plot_cursor(cursor, labels=columnnames)
# elif cmd == "scatter":
# cursor = self.db.db.execute(self.db.mangle_sql(args))
# columnnames = [column[0] for column in cursor.description]
# self.db.scatter_cursor(cursor, labels=columnnames)
# else:
# print("invalid magic command")

2 changes: 2 additions & 0 deletions logpyle/runalyzer.py
Original file line number Diff line number Diff line change
@@ -76,6 +76,8 @@ def plot_cursor(self, cursor, labels=None, *args, **kwargs):

auto_style = kwargs.pop("auto_style", True)

print(cursor)

if len(cursor.description) == 2:
if auto_style:
style = PLOT_STYLES[0]
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -50,6 +50,7 @@
"bin/logtool",
"bin/runalyzer-gather",
"bin/runalyzer",
"bin/pandalyzer",
],

author="Andreas Kloeckner",