Skip to content

Commit a070d86

Browse files
committed
benchmark, update build, address comments
Signed-off-by: Mohit Chhaya <[email protected]>
1 parent 58dba81 commit a070d86

File tree

3 files changed

+67
-150
lines changed

3 files changed

+67
-150
lines changed

cpp/csp/cppnodes/statsimpl.h

Lines changed: 18 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,9 @@
88
#include <numeric>
99
#include <set>
1010
#include <type_traits>
11-
12-
#ifdef __linux__
13-
#include <ext/pb_ds/assoc_container.hpp>
14-
#include <ext/pb_ds/tree_policy.hpp>
15-
#else
1611
#include <boost/multi_index_container.hpp>
1712
#include <boost/multi_index/ordered_index.hpp>
1813
#include <boost/multi_index/ranked_index.hpp>
19-
#endif
2014

2115
namespace csp::cppnodes
2216
{
@@ -1091,22 +1085,8 @@ class WeightedKurtosis
10911085
bool m_excess;
10921086
};
10931087

1094-
#ifdef __linux__
1095-
template<typename Comparator>
1096-
using ost = __gnu_pbds::tree<double, __gnu_pbds::null_type, Comparator, __gnu_pbds::rb_tree_tag,
1097-
__gnu_pbds::tree_order_statistics_node_update>;
1098-
1099-
template<typename Comparator>
1100-
void ost_erase( ost<Comparator> &t, double & v )
1101-
{
1102-
int rank = t.order_of_key( v );
1103-
auto it = t.find_by_order( rank );
1104-
t.erase( it );
1105-
}
1106-
#else
11071088
template <typename Comparator>
11081089
using ost = boost::multi_index::multi_index_container<double, boost::multi_index::indexed_by<boost::multi_index::ranked_non_unique<boost::multi_index::identity<double>, Comparator>>>;
1109-
#endif
11101090

11111091
class Quantile
11121092
{
@@ -1149,11 +1129,7 @@ class Quantile
11491129

11501130
void remove( double x )
11511131
{
1152-
#ifdef __linux__
1153-
ost_erase( m_tree, x );
1154-
#else
11551132
m_tree.erase( m_tree.find( x ) );
1156-
#endif
11571133
}
11581134

11591135
void reset()
@@ -1172,111 +1148,60 @@ class Quantile
11721148
double target = std::get<double>( m_quants[index]._data ) * ( m_tree.size() - 1 );
11731149
int ft = floor( target );
11741150
int ct = ceil( target );
1151+
auto fIt = m_tree.get<0>().nth( ft );
1152+
auto cIt = ( ft == ct ) ? fIt : std::next( fIt );
11751153

11761154
double qtl = 0.0;
1177-
#ifdef __linux__
11781155
switch ( m_interpolation )
1179-
{
1180-
case LINEAR:
1181-
if( ft == target )
1182-
{
1183-
qtl = *m_tree.find_by_order( ft );
1184-
}
1185-
else
1186-
{
1187-
double lower = *m_tree.find_by_order( ft );
1188-
double higher = *m_tree.find_by_order( ct );
1189-
qtl = ( 1 - target + ft ) * lower + ( 1 - ct + target ) * higher;
1190-
}
1191-
break;
1192-
case LOWER:
1193-
qtl = *m_tree.find_by_order( ft );
1194-
break;
1195-
case HIGHER:
1196-
qtl = *m_tree.find_by_order( ct );
1197-
break;
1198-
case MIDPOINT:
1199-
if( ft == target )
1200-
{
1201-
qtl = *m_tree.find_by_order( ft );
1202-
}
1203-
else
1204-
{
1205-
double lower = *m_tree.find_by_order( ft );
1206-
double higher = *m_tree.find_by_order( ct );
1207-
qtl = ( higher+lower ) / 2;
1208-
}
1209-
break;
1210-
case NEAREST:
1211-
if( target - ft < ct - target )
1212-
{
1213-
qtl = *m_tree.find_by_order( ft );
1214-
}
1215-
else
1216-
{
1217-
qtl = *m_tree.find_by_order( ct );
1218-
}
1219-
break;
1220-
default:
1221-
break;
1222-
}
1223-
#else
1224-
switch (m_interpolation)
12251156
{
12261157
case LINEAR:
1227-
if (ft == target)
1158+
if ( ft == target )
12281159
{
1229-
qtl = *m_tree.get<0>().nth(ft);
1160+
qtl = *fIt;
12301161
}
12311162
else
12321163
{
1233-
double lower = *m_tree.get<0>().nth(ft);
1234-
double higher = *m_tree.get<0>().nth(ct);
1235-
qtl = (1 - target + ft) * lower + (1 - ct + target) * higher;
1164+
double lower = *fIt;
1165+
double higher = *cIt;
1166+
qtl = ( 1 - target + ft ) * lower + ( 1 - ct + target ) * higher;
12361167
}
12371168
break;
12381169
case LOWER:
1239-
qtl = *m_tree.get<0>().nth(ft);
1170+
qtl = *fIt;
12401171
break;
12411172
case HIGHER:
1242-
qtl = *m_tree.get<0>().nth(ct);
1173+
qtl = *cIt;
12431174
break;
12441175
case MIDPOINT:
1245-
if (ft == target)
1176+
if ( ft == target )
12461177
{
1247-
qtl = *m_tree.get<0>().nth(ft);
1178+
qtl = *fIt;
12481179
}
12491180
else
12501181
{
1251-
double lower = *m_tree.get<0>().nth(ft);
1252-
double higher = *m_tree.get<0>().nth(ct);
1253-
qtl = (higher + lower) / 2;
1182+
double lower = *fIt;
1183+
double higher = *cIt;
1184+
qtl = ( higher + lower ) / 2;
12541185
}
12551186
break;
12561187
case NEAREST:
1257-
if (target - ft < ct - target)
1188+
if ( target - ft < ct - target )
12581189
{
1259-
qtl = *m_tree.get<0>().nth(ft);
1190+
qtl = *fIt;
12601191
}
12611192
else
12621193
{
1263-
qtl = *m_tree.get<0>().nth(ct);
1194+
qtl = *cIt;
12641195
}
12651196
break;
12661197
default:
12671198
break;
12681199
}
1269-
#endif
12701200
return qtl;
12711201
}
12721202

12731203
private:
1274-
1275-
#ifdef __linux__
1276-
ost<std::less_equal<double>> m_tree;
1277-
#else
12781204
ost<std::less<double>> m_tree;
1279-
#endif
12801205
std::vector<Dictionary::Data> m_quants;
12811206
int64_t m_interpolation;
12821207
};
@@ -1364,35 +1289,21 @@ class Rank
13641289
else
13651290
{
13661291
m_lastval = x;
1367-
#ifdef __linux__
1368-
if( m_method == MAX )
1369-
m_maxtree.insert( x );
1370-
else
1371-
m_mintree.insert( x );
1372-
#else
13731292
if( m_method == MAX )
13741293
m_maxtree.insert( x );
13751294
else
13761295
m_mintree.insert( x );
1377-
#endif
13781296
}
13791297
}
13801298

13811299
void remove( double x )
13821300
{
13831301
if( likely( !isnan( x ) ) )
13841302
{
1385-
#ifdef __linux__
1386-
if( m_method == MAX )
1387-
ost_erase( m_maxtree, x );
1388-
else
1389-
ost_erase( m_mintree, x );
1390-
#else
13911303
if ( m_method == MAX )
13921304
m_maxtree.erase ( m_maxtree.find( x ) );
13931305
else
13941306
m_mintree.erase ( m_mintree.find( x ) );
1395-
#endif
13961307
}
13971308
}
13981309

@@ -1408,42 +1319,6 @@ class Rank
14081319
{
14091320
// Verify tree is not empty and lastValue is valid
14101321
// Last value can only ever be NaN if the "keep" nan option is used
1411-
#ifdef __linux__
1412-
if( likely( !isnan( m_lastval ) && ( ( m_method == MAX && m_maxtree.size() > 0 ) || m_mintree.size() > 0 ) ) )
1413-
{
1414-
switch( m_method )
1415-
{
1416-
case MIN:
1417-
{
1418-
if( m_mintree.size() == 1 )
1419-
return 0;
1420-
return m_mintree.order_of_key( m_lastval );
1421-
}
1422-
case MAX:
1423-
{
1424-
if( m_maxtree.size() == 1 )
1425-
return 0;
1426-
return m_maxtree.size() - 1 - m_maxtree.order_of_key( m_lastval );
1427-
}
1428-
case AVG:
1429-
{
1430-
// Need to iterate to find average rank
1431-
if( m_mintree.size() == 1 )
1432-
return 0;
1433-
1434-
int min_rank = m_mintree.order_of_key( m_lastval );
1435-
int max_rank = min_rank;
1436-
auto it = m_mintree.find_by_order( min_rank );
1437-
it++;
1438-
for( ; it != m_mintree.end() && *it == m_lastval ; it++ ) max_rank++;
1439-
return ( double )( min_rank + max_rank ) / 2;
1440-
}
1441-
1442-
default:
1443-
break;
1444-
}
1445-
}
1446-
#else
14471322
if( likely( !isnan( m_lastval ) && ( ( m_method == MAX && m_maxtree.size() > 0 ) || m_mintree.size() > 0 ) ) )
14481323
{
14491324
switch( m_method )
@@ -1469,26 +1344,19 @@ class Rank
14691344
int max_rank = min_rank;
14701345
auto it = m_mintree.get<0>().nth( min_rank );
14711346
it++;
1472-
for( ; it != m_mintree.end() && *it == m_lastval ; it++ ) max_rank++;
1347+
for( ; it != m_mintree.end() && *it == m_lastval ; it++ ) max_rank++; // While this is in theory O(n), in reality this loop is only interated once, since there are likely no duplicate values or very few.
14731348
return ( double )( min_rank + max_rank ) / 2;
14741349
}
14751350
default:
14761351
break;
14771352
}
14781353
}
1479-
#endif
14801354
return std::numeric_limits<double>::quiet_NaN();
14811355
}
14821356

14831357
private:
1484-
1485-
#ifdef __linux__
1486-
ost<std::less_equal<double>> m_mintree;
1487-
ost<std::greater_equal<double>> m_maxtree;
1488-
#else
14891358
ost<std::less<double>> m_mintree;
14901359
ost<std::greater<double>> m_maxtree;
1491-
#endif
14921360
double m_lastval;
14931361

14941362
int64_t m_method;

csp/benchmarks/stats.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Written by @AdamGlustein
2+
import numpy as np
3+
import time
4+
from datetime import datetime, timedelta
5+
6+
import csp
7+
8+
st = datetime(2020, 1, 1)
9+
N = 10_000
10+
ARRAY_SIZE = 100
11+
TEST_TIMES = [st + timedelta(seconds=i) for i in range(N)]
12+
RANDOM_VALUES = [np.random.normal(size=(ARRAY_SIZE,)) for i in range(N)] # 100 element np array
13+
DATA = list(zip(TEST_TIMES, RANDOM_VALUES))
14+
INTERVAL = 1000
15+
NUM_SAMPLES = 100
16+
17+
18+
def g_qtl():
19+
data = csp.curve(typ=np.ndarray, data=DATA)
20+
median = csp.stats.median(data, interval=INTERVAL)
21+
csp.add_graph_output("final_median", median, tick_count=1)
22+
23+
24+
def g_rank():
25+
data = csp.curve(typ=np.ndarray, data=DATA)
26+
rank = csp.stats.rank(data, interval=INTERVAL)
27+
csp.add_graph_output("final_rank", rank, tick_count=1)
28+
29+
30+
if __name__ == "__main__":
31+
qtl_times, rank_times = list(), list()
32+
for i in range(NUM_SAMPLES):
33+
start = time.time()
34+
csp.run(g_qtl, starttime=st, endtime=timedelta(seconds=N))
35+
post_qtl = time.time()
36+
csp.run(g_rank, starttime=st, endtime=timedelta(seconds=N))
37+
post_rank = time.time()
38+
39+
qtl_times.append(post_qtl - start)
40+
rank_times.append(post_rank - post_qtl)
41+
print(i)
42+
43+
avg_med = sum(qtl_times) / NUM_SAMPLES
44+
avg_rank = sum(rank_times) / NUM_SAMPLES
45+
print(
46+
f"Average time in {NUM_SAMPLES} tests for median with {N=}, {ARRAY_SIZE=}, {INTERVAL=}: {round(avg_med, 2)} s"
47+
)
48+
print(f"Average time in {NUM_SAMPLES} tests for rank with {N=}, {ARRAY_SIZE=}, {INTERVAL=}: {round(avg_rank, 2)} s")

vcpkg.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"abseil",
66
"arrow",
77
"boost-beast",
8+
"boost-multi-index",
89
"brotli",
910
"exprtk",
1011
"gtest",

0 commit comments

Comments
 (0)