Skip to content

Commit baa1601

Browse files
authoredAug 20, 2017
Start adding an asv benchmark suite (geopandas#497)
1 parent a3ee99b commit baa1601

File tree

6 files changed

+388
-0
lines changed

6 files changed

+388
-0
lines changed
 

‎asv.conf.json

+151
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
{
2+
// The version of the config file format. Do not change, unless
3+
// you know what you are doing.
4+
"version": 1,
5+
6+
// The name of the project being benchmarked
7+
"project": "geopandas",
8+
9+
// The project's homepage
10+
"project_url": "http://geopandas.org/",
11+
12+
// The URL or local path of the source code repository for the
13+
// project being benchmarked
14+
"repo": ".",
15+
16+
// List of branches to benchmark. If not provided, defaults to "master"
17+
// (for git) or "default" (for mercurial).
18+
// "branches": ["master"], // for git
19+
// "branches": ["default"], // for mercurial
20+
21+
// The DVCS being used. If not set, it will be automatically
22+
// determined from "repo" by looking at the protocol in the URL
23+
// (if remote), or by looking for special directories, such as
24+
// ".git" (if local).
25+
// "dvcs": "git",
26+
27+
// The tool to use to create environments. May be "conda",
28+
// "virtualenv" or other value depending on the plugins in use.
29+
// If missing or the empty string, the tool will be automatically
30+
// determined by looking for tools on the PATH environment
31+
// variable.
32+
"environment_type": "conda",
33+
34+
// timeout in seconds for installing any dependencies in environment
35+
// defaults to 10 min
36+
//"install_timeout": 600,
37+
38+
// the base URL to show a commit for the project.
39+
"show_commit_url": "http://github.com/geopandas/geopandas/commit/",
40+
41+
// The Pythons you'd like to test against. If not provided, defaults
42+
// to the current version of Python used to run `asv`.
43+
// "pythons": ["2.7", "3.3"],
44+
45+
// The matrix of dependencies to test. Each key is the name of a
46+
// package (in PyPI) and the values are version numbers. An empty
47+
// list or empty string indicates to just test against the default
48+
// (latest) version. null indicates that the package is to not be
49+
// installed. If the package to be tested is only available from
50+
// PyPi, and the 'environment_type' is conda, then you can preface
51+
// the package name by 'pip+', and the package will be installed via
52+
// pip (with all the conda available packages installed first,
53+
// followed by the pip installed packages).
54+
//
55+
"matrix": {
56+
"pandas": [],
57+
"shapely": [],
58+
"cython": [],
59+
"fiona": [],
60+
"pyproj": [],
61+
"six": [],
62+
"rtree": [],
63+
"matplotlib": [],
64+
"descartes": []
65+
},
66+
// "numpy": ["1.6", "1.7"],
67+
// "six": ["", null], // test with and without six installed
68+
// "pip+emcee": [""], // emcee is only available for install with pip.
69+
// },
70+
71+
// Combinations of libraries/python versions can be excluded/included
72+
// from the set to test. Each entry is a dictionary containing additional
73+
// key-value pairs to include/exclude.
74+
//
75+
// An exclude entry excludes entries where all values match. The
76+
// values are regexps that should match the whole string.
77+
//
78+
// An include entry adds an environment. Only the packages listed
79+
// are installed. The 'python' key is required. The exclude rules
80+
// do not apply to includes.
81+
//
82+
// In addition to package names, the following keys are available:
83+
//
84+
// - python
85+
// Python version, as in the *pythons* variable above.
86+
// - environment_type
87+
// Environment type, as above.
88+
// - sys_platform
89+
// Platform, as in sys.platform. Possible values for the common
90+
// cases: 'linux2', 'win32', 'cygwin', 'darwin'.
91+
//
92+
// "exclude": [
93+
// {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
94+
// {"environment_type": "conda", "six": null}, // don't run without six on conda
95+
// ],
96+
//
97+
// "include": [
98+
// // additional env for python2.7
99+
// {"python": "2.7", "numpy": "1.8"},
100+
// // additional env if run on windows+conda
101+
// {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
102+
// ],
103+
104+
// The directory (relative to the current directory) that benchmarks are
105+
// stored in. If not provided, defaults to "benchmarks"
106+
// "benchmark_dir": "benchmarks",
107+
108+
// The directory (relative to the current directory) to cache the Python
109+
// environments in. If not provided, defaults to "env"
110+
"env_dir": ".asv/env",
111+
112+
// The directory (relative to the current directory) that raw benchmark
113+
// results are stored in. If not provided, defaults to "results".
114+
"results_dir": ".asv/results",
115+
116+
// The directory (relative to the current directory) that the html tree
117+
// should be written to. If not provided, defaults to "html".
118+
"html_dir": ".asv/html",
119+
120+
// The number of characters to retain in the commit hashes.
121+
// "hash_length": 8,
122+
123+
// `asv` will cache wheels of the recent builds in each
124+
// environment, making them faster to install next time. This is
125+
// number of builds to keep, per environment.
126+
// "wheel_cache_size": 0
127+
128+
// The commits after which the regression search in `asv publish`
129+
// should start looking for regressions. Dictionary whose keys are
130+
// regexps matching to benchmark names, and values corresponding to
131+
// the commit (exclusive) after which to start looking for
132+
// regressions. The default is to start from the first commit
133+
// with results. If the commit is `null`, regression detection is
134+
// skipped for the matching benchmark.
135+
//
136+
// "regressions_first_commits": {
137+
// "some_benchmark": "352cdf", // Consider regressions only after this commit
138+
// "another_benchmark": null, // Skip regression detection altogether
139+
// }
140+
141+
// The thresholds for relative change in results, after which `asv
142+
// publish` starts reporting regressions. Dictionary of the same
143+
// form as in ``regressions_first_commits``, with values
144+
// indicating the thresholds. If multiple entries match, the
145+
// maximum is taken. If no entry matches, the default is 5%.
146+
//
147+
// "regressions_thresholds": {
148+
// "some_benchmark": 0.01, // Threshold of 1%
149+
// "another_benchmark": 0.5, // Threshold of 50%
150+
// }
151+
}

‎benchmarks/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

‎benchmarks/geom_methods.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import random
2+
3+
import numpy as np
4+
from geopandas import GeoSeries
5+
from shapely.geometry import Point, LineString, Polygon
6+
7+
8+
def with_attributes(**attrs):
9+
def decorator(func):
10+
for key, value in attrs.items():
11+
setattr(func, key, value)
12+
return func
13+
return decorator
14+
15+
16+
class Bench:
17+
18+
def setup(self, *args):
19+
self.points = GeoSeries([Point(i, i) for i in range(100000)])
20+
21+
triangles = GeoSeries([Polygon([(random.random(), random.random())
22+
for _ in range(3)])
23+
for _ in range(1000)])
24+
triangles2 = triangles.copy().iloc[np.random.choice(1000, 1000)]
25+
triangles3 = GeoSeries([Polygon([(random.random(), random.random())
26+
for _ in range(3)])
27+
for _ in range(10000)])
28+
triangle = Polygon([(random.random(), random.random())
29+
for _ in range(3)])
30+
self.triangles, self.triangles2 = triangles, triangles2
31+
self.triangles_big = triangles3
32+
self.triangle = triangle
33+
34+
@with_attributes(param_names=['op'],
35+
params=[('contains', 'crosses', 'disjoint', 'intersects',
36+
'overlaps', 'touches', 'within', 'geom_equals',
37+
'geom_almost_equals', 'geom_equals_exact')])
38+
def time_binary_predicate(self, op):
39+
getattr(self.triangles, op)(self.triangle)
40+
41+
@with_attributes(param_names=['op'],
42+
params=[('contains', 'crosses', 'disjoint', 'intersects',
43+
'overlaps', 'touches', 'within', 'geom_equals',
44+
'geom_almost_equals')]) # 'geom_equals_exact')])
45+
def time_binary_predicate_vector(self, op):
46+
getattr(self.triangles, op)(self.triangles2)
47+
48+
@with_attributes(param_names=['op'],
49+
params=[('distance')])
50+
def time_binary_float(self, op):
51+
getattr(self.triangles, op)(self.triangle)
52+
53+
@with_attributes(param_names=['op'],
54+
params=[('distance')])
55+
def time_binary_float_vector(self, op):
56+
getattr(self.triangles, op)(self.triangles2)
57+
58+
@with_attributes(param_names=['op'],
59+
params=[('difference', 'symmetric_difference', 'union',
60+
'intersection')])
61+
def time_binary_geo(self, op):
62+
getattr(self.triangles, op)(self.triangle)
63+
64+
@with_attributes(param_names=['op'],
65+
params=[('difference', 'symmetric_difference', 'union',
66+
'intersection')])
67+
def time_binary_geo_vector(self, op):
68+
getattr(self.triangles, op)(self.triangles2)
69+
70+
@with_attributes(param_names=['op'],
71+
params=[('is_valid', 'is_empty', 'is_simple', 'is_ring')])
72+
def time_unary_predicate(self, op):
73+
getattr(self.triangles, op)
74+
75+
@with_attributes(param_names=['op'],
76+
params=[('area', 'length')])
77+
def time_unary_float(self, op):
78+
getattr(self.triangles_big, op)
79+
80+
@with_attributes(param_names=['op'],
81+
params=[('boundary', 'centroid', 'convex_hull',
82+
'envelope', 'exterior', 'interiors')])
83+
def time_unary_geo(self, op):
84+
getattr(self.triangles, op)
85+
86+
def time_unary_geo_representative_point(self, *args):
87+
getattr(self.triangles, 'representative_point')()
88+
89+
def time_geom_type(self, *args):
90+
self.triangles_big.geom_type
91+
92+
def time_bounds(self, *args):
93+
self.triangles.bounds
94+
95+
def time_unary_union(self, *args):
96+
self.triangles.unary_union
97+
98+
def time_buffer(self, *args):
99+
self.points.buffer(2)
100+
101+
102+
# TODO
103+
# project, interpolate, translate, rotate, scale, skew, explode
104+
# cx indexer

‎benchmarks/overlay.py

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from geopandas import GeoDataFrame, GeoSeries, read_file, datasets, overlay
2+
from shapely.geometry import Polygon
3+
4+
5+
class Countries:
6+
7+
param_names = ['op']
8+
params = [('intersection', 'union', 'identity', 'symmetric_difference',
9+
'difference')]
10+
11+
def setup(self, *args):
12+
world = read_file(datasets.get_path('naturalearth_lowres'))
13+
capitals = read_file(datasets.get_path('naturalearth_cities'))
14+
countries = world[['geometry', 'name']]
15+
countries = countries.to_crs('+init=epsg:3395')[
16+
countries.name != "Antarctica"]
17+
capitals = capitals.to_crs('+init=epsg:3395')
18+
capitals['geometry'] = capitals.buffer(500000)
19+
20+
self.countries = countries
21+
self.capitals = capitals
22+
23+
def time_overlay(self, op):
24+
overlay(self.countries, self.capitals, how=op)
25+
26+
27+
class Small:
28+
29+
param_names = ['op']
30+
params = [('intersection', 'union', 'identity', 'symmetric_difference',
31+
'difference')]
32+
33+
def setup(self, *args):
34+
polys1 = GeoSeries([Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]),
35+
Polygon([(2, 2), (4, 2), (4, 4), (2, 4)])])
36+
polys2 = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
37+
Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
38+
39+
df1 = GeoDataFrame({'geometry': polys1, 'df1': [1, 2]})
40+
df2 = GeoDataFrame({'geometry': polys2, 'df2': [1, 2]})
41+
42+
self.df1, self.df2 = df1, df2
43+
44+
def time_overlay(self, op):
45+
overlay(self.df1, self.df2, how=op)

‎benchmarks/plotting.py

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import random
2+
3+
from geopandas import GeoDataFrame, GeoSeries
4+
from shapely.geometry import Point, LineString, Polygon, MultiPolygon
5+
import numpy as np
6+
7+
8+
class Bench:
9+
10+
param_names = ['geom_type']
11+
params = [('Point', 'LineString', 'Polygon', 'MultiPolygon', 'mixed')]
12+
13+
def setup(self, geom_type):
14+
15+
if geom_type == 'Point':
16+
geoms = GeoSeries([Point(i, i) for i in range(1000)])
17+
elif geom_type == 'LineString':
18+
geoms = GeoSeries([LineString([(random.random(), random.random())
19+
for _ in range(5)])
20+
for _ in range(100)])
21+
elif geom_type == 'Polygon':
22+
geoms = GeoSeries([Polygon([(random.random(), random.random())
23+
for _ in range(3)])
24+
for _ in range(100)])
25+
elif geom_type == 'MultiPolygon':
26+
geoms = GeoSeries(
27+
[MultiPolygon([Polygon([(random.random(), random.random())
28+
for _ in range(3)])
29+
for _ in range(3)])
30+
for _ in range(20)])
31+
elif geom_type == 'mixed':
32+
g1 = GeoSeries([Point(i, i) for i in range(100)])
33+
g2 = GeoSeries([LineString([(random.random(), random.random())
34+
for _ in range(5)])
35+
for _ in range(100)])
36+
g3 = GeoSeries([Polygon([(random.random(), random.random())
37+
for _ in range(3)])
38+
for _ in range(100)])
39+
40+
geoms = g1
41+
geoms.iloc[np.random.randint(0, 100, 50)] = g2
42+
geoms.iloc[np.random.randint(0, 100, 33)] = g3
43+
44+
print(geoms.geom_type.value_counts())
45+
46+
df = GeoDataFrame({'geometry': geoms,
47+
'values': np.random.randn(len(geoms))})
48+
49+
self.geoms = geoms
50+
self.df = df
51+
52+
def time_plot_series(self, *args):
53+
self.geoms.plot()
54+
55+
def time_plot_values(self, *args):
56+
self.df.plot(column='values')
57+

‎benchmarks/sjoin.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import random
2+
3+
from geopandas import GeoDataFrame, GeoSeries, sjoin
4+
from shapely.geometry import Point, LineString, Polygon
5+
import numpy as np
6+
7+
8+
class Bench:
9+
10+
param_names = ['op']
11+
params = [('intersects', 'contains', 'within')]
12+
13+
def setup(self, *args):
14+
triangles = GeoSeries(
15+
[Polygon([(random.random(), random.random()) for _ in range(3)])
16+
for _ in range(1000)])
17+
18+
points = GeoSeries(
19+
[Point(x, y) for x, y in zip(np.random.random(10000),
20+
np.random.random(10000))])
21+
22+
df1 = GeoDataFrame({'val1': np.random.randn(len(triangles)),
23+
'geometry': triangles})
24+
df2 = GeoDataFrame({'val1': np.random.randn(len(points)),
25+
'geometry': points})
26+
27+
self.df1, self.df2 = df1, df2
28+
29+
def time_sjoin(self, op):
30+
sjoin(self.df1, self.df2, op=op)

0 commit comments

Comments
 (0)
Please sign in to comment.