-
Notifications
You must be signed in to change notification settings - Fork 137
/
Copy pathprofile-pg-func
executable file
·185 lines (155 loc) · 6.83 KB
/
profile-pg-func
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#!/usr/bin/env python
"""
Compare execution time of PostgreSQL functions
Usage:
profile-pg-func [--file <file>]... <func>... [--raw]
[--calls <calls>] [--runs <runs>] [--trim <trim>]
[--verbose] [--pghost=<host>] [--pgport=<port>] [--dbname=<db>]
[--user=<user>] [--password=<password>]
profile-pg-func --help
profile-pg-func --version
<func> A SQL function call that should be tested, e.g. 'random()'.
The func code can use call_idx column -- it will be set to 1..{calls}
If --raw is used, the <func> must be a complete query.
These magical values will be substituted in <func> before running:
* {calls} will be replaced with --calls value
* {run_idx} will be replaced with the current RUN number (1..{runs})
* {random_geopoint} will be replaced with a function call to generate
a random geo point.
Options:
-f --file <file> Import this file(s) into PostgreSQL before profiling.
--raw Treat <func> as a complete SQL statement.
-c --calls <calls> How many times to call each test function. [default: 100000]
-r --runs <runs> How many runs to do (each run executes N calls). [default: 10]
-t --trim <trim> How many of the highest and lowest runs to ignore. [default: 1]
-v --verbose Print additional debugging information
--help Show this screen.
--version Show version.
PostgreSQL Options:
-h --pghost=<host> Postgres hostname. By default uses PGHOST env or "localhost" if not set.
-P --pgport=<port> Postgres port. By default uses PGPORT env or "5432" if not set.
-d --dbname=<db> Postgres db name. By default uses PGDATABASE env or "openmaptiles" if not set.
-U --user=<user> Postgres user. By default uses PGUSER env or "openmaptiles" if not set.
--password=<password> Postgres password. By default uses PGPASSWORD env or "openmaptiles" if not set.
These legacy environment variables should not be used, but they are still supported:
POSTGRES_HOST, POSTGRES_PORT, POSTGRES_DB, POSTGRES_USER, POSTGRES_PASSWORD
"""
from dataclasses import dataclass, field
from pathlib import Path
import asyncio
import asyncpg
import statistics
from datetime import datetime as dt, timedelta
# noinspection PyProtectedMember
from docopt import docopt, DocoptExit
from tabulate import tabulate
from typing import List
import openmaptiles
from openmaptiles.pgutils import parse_pg_args, PgWarnings, print_query_error
from openmaptiles.utils import round_td, shorten_str
def get_int(args, param, min_val) -> int:
try:
v = int(args[param])
if v < min_val:
raise DocoptExit(f'{param} must be an integer >= {min_val}')
return v
except ValueError:
raise DocoptExit(f"{param} must be an integer, but '{args[param]}' was given")
@dataclass
class Stat:
index: int
function: str
perf_delta: List[timedelta] = field(default_factory=list)
perf_seconds: List[float] = None
mean: timedelta = None
min: timedelta = None
max: timedelta = None
stdev: float = None
position: int = None
slower_by: float = None
def finalize(self, trim):
"""Get rid of the N fastest and N slowest tests, populate perf_seconds"""
self.perf_delta.sort()
if trim > 0:
self.perf_delta = self.perf_delta[trim:-trim]
self.perf_seconds = [v.total_seconds() for v in self.perf_delta]
self.mean = timedelta(seconds=statistics.mean(self.perf_seconds))
self.min = timedelta(seconds=min(self.perf_seconds))
self.max = timedelta(seconds=max(self.perf_seconds))
self.stdev = statistics.stdev(self.perf_seconds)
def format_func(self, verbose):
res = self.function
if not verbose:
res = shorten_str(res, 50)
return f'{self.index}) {res}'
async def main(args):
pghost, pgport, dbname, user, password = parse_pg_args(args)
functions = args['<func>']
files = args['--file']
raw = args['--raw']
runs = get_int(args, '--runs', 1)
calls = get_int(args, '--calls', 1)
trim = get_int(args, '--trim', 0)
verbose = args['--verbose']
if trim * 2 >= runs:
raise DocoptExit('--trim must be less than half of --runs')
conn = await asyncpg.connect(
database=dbname, host=pghost, port=pgport, user=user, password=password,
)
pg_warnings = PgWarnings(conn, delay_printing=True)
for file in files:
print(f'Importing {file}...')
await conn.execute(Path(file).read_text('utf-8'))
print(f'Running {len(functions)} functions x {runs} runs x {calls:,} each...')
stats = [Stat(idx, func) for idx, func in enumerate(functions, start=1)]
for run in range(runs):
for stat in stats:
if raw:
query = stat.function
else:
query = f'SELECT count(*) FROM (SELECT {stat.function} FROM generate_series(1, {calls})) call_idx'
for k, v in {
'{calls}': str(calls),
'{run_idx}': str(run),
'{random_geopoint}': 'st_setsrid(st_makepoint(random()*40000000-20000000, random()*40000000-20000000), 3857)',
}.items():
query = query.replace(k, v)
if verbose:
print(f'Running: {query}')
start = dt.utcnow()
try:
await conn.execute(query)
took = dt.utcnow() - start
except asyncpg.PostgresError as err:
took = dt.utcnow() - start
print_query_error(f'ERROR running {stat.function} ({took:.2f}s)',
err, pg_warnings, verbose, query)
return
stat.perf_delta.append(took)
pg_warnings.print()
if verbose:
print(f'Done in {round_td(took)}')
print(f'Finished run #{run + 1}')
if trim > 0:
print(f'Dropping {trim} fastest and {trim} slowest runs')
for stat in stats:
stat.finalize(trim)
stats_by_speed = list(sorted(stats, key=lambda s: s.mean))
for idx, stat in enumerate(stats_by_speed):
stat.position = idx + 1
if idx > 0:
first_mean = stats_by_speed[0].mean
stat.slower_by = (stat.mean - first_mean) / first_mean
print()
mean_hdr = f'AVG of {runs - trim * 2} runs'
results = [{
'% slower': '' if stat.slower_by is None else f'{stat.slower_by:,.1%}',
'Function': stat.format_func(verbose),
mean_hdr: stat.mean,
'MIN': stat.min,
'MAX': stat.max,
'STDEV': stat.stdev,
} for stat in stats_by_speed]
print(tabulate(results, headers='keys'))
if __name__ == '__main__':
asyncio.run(main(docopt(__doc__, version=openmaptiles.__version__)))