-
Notifications
You must be signed in to change notification settings - Fork 587
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3962 from tybug/shrinker-ir
Migrate most shrinker functions to the ir
- Loading branch information
Showing
34 changed files
with
970 additions
and
1,325 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
RELEASE_TYPE: minor | ||
|
||
This release migrates the shrinker to our new internal representation, called the IR layer (:pull:`3962`). This improves the shrinker's performance in the majority of cases. For example, on the Hypothesis test suite, shrinking is a median of 1.38x faster. | ||
|
||
It is possible this release regresses performance while shrinking certain strategies. If you encounter strategies which reliably shrink more slowly than they used to (or shrink slowly at all), please open an issue! | ||
|
||
You can read more about the IR layer at :issue:`3921`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
This directory contains code for benchmarking Hypothesis' shrinking. This was written for [pull/3962](https://github.com/HypothesisWorks/hypothesis/pull/3962) and is a manual process at the moment, though we may eventually integrate it more closely with ci for automated benchmarking. | ||
|
||
To run a benchmark: | ||
|
||
* Add the contents of `conftest.py` to the bottom of `hypothesis-python/tests/conftest.py` | ||
* In `hypothesis-python/tests/common/debug.py`, change `derandomize=True` to `derandomize=False` (if you are running more than one trial) | ||
* Run the tests: `pytest hypothesis-python/tests/` | ||
* Note that the benchmarking script does not currently support xdist, so do not use `-n 8` or similar. | ||
|
||
When pytest finishes the output will contain a dictionary of the benchmarking results. Add that as a new entry in `data.json`. Repeat for however many trials you want; n=5 seems reasonable. | ||
|
||
Also repeat for both your baseline ("old") and your comparison ("new") code. | ||
|
||
Then run `python graph.py` to generate a graph comparing the old and new results. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# This file is part of Hypothesis, which may be found at | ||
# https://github.com/HypothesisWorks/hypothesis/ | ||
# | ||
# Copyright the Hypothesis Authors. | ||
# Individual contributors are listed in AUTHORS.rst and the git log. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public License, | ||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||
# obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
import inspect | ||
import json | ||
from collections import defaultdict | ||
|
||
import pytest | ||
from _pytest.monkeypatch import MonkeyPatch | ||
|
||
# we'd like to support xdist here for parallelism, but a session-scope fixture won't | ||
# be enough: https://github.com/pytest-dev/pytest-xdist/issues/271. need a lockfile | ||
# or equivalent. | ||
shrink_calls = defaultdict(list) | ||
|
||
|
||
def pytest_collection_modifyitems(config, items): | ||
skip = pytest.mark.skip(reason="Does not call minimal()") | ||
for item in items: | ||
# is this perfect? no. but it is cheap! | ||
if " minimal(" in inspect.getsource(item.obj): | ||
continue | ||
item.add_marker(skip) | ||
|
||
|
||
@pytest.fixture(scope="function", autouse=True) | ||
def _benchmark_shrinks(): | ||
from hypothesis.internal.conjecture.shrinker import Shrinker | ||
|
||
monkeypatch = MonkeyPatch() | ||
|
||
def record_shrink_calls(calls): | ||
name = None | ||
for frame in inspect.stack(): | ||
if frame.function.startswith("test_"): | ||
name = f"{frame.filename.split('/')[-1]}::{frame.function}" | ||
# some minimal calls happen at collection-time outside of a test context | ||
# (maybe something we should fix/look into) | ||
if name is None: | ||
return | ||
|
||
shrink_calls[name].append(calls) | ||
|
||
old_shrink = Shrinker.shrink | ||
|
||
def shrink(self, *args, **kwargs): | ||
v = old_shrink(self, *args, **kwargs) | ||
record_shrink_calls(self.engine.call_count - self.initial_calls) | ||
return v | ||
|
||
monkeypatch.setattr(Shrinker, "shrink", shrink) | ||
yield | ||
|
||
# start teardown | ||
Shrinker.shrink = old_shrink | ||
|
||
|
||
def pytest_sessionfinish(session, exitstatus): | ||
print(f"\nshrinker profiling:\n{json.dumps(shrink_calls)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
{ | ||
"old": [], | ||
"new": [] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
# This file is part of Hypothesis, which may be found at | ||
# https://github.com/HypothesisWorks/hypothesis/ | ||
# | ||
# Copyright the Hypothesis Authors. | ||
# Individual contributors are listed in AUTHORS.rst and the git log. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public License, | ||
# v. 2.0. If a copy of the MPL was not distributed with this file, You can | ||
# obtain one at https://mozilla.org/MPL/2.0/. | ||
|
||
import json | ||
import statistics | ||
from pathlib import Path | ||
|
||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
|
||
data_path = Path(__file__).parent / "data.json" | ||
with open(data_path) as f: | ||
data = json.loads(f.read()) | ||
|
||
old_runs = data["old"] | ||
new_runs = data["new"] | ||
all_runs = old_runs + new_runs | ||
|
||
# every run should involve the same functions | ||
names = set() | ||
for run in all_runs: | ||
names.add(frozenset(run.keys())) | ||
|
||
intersection = frozenset.intersection(*names) | ||
diff = frozenset.union(*[intersection.symmetric_difference(n) for n in names]) | ||
|
||
print(f"skipping these tests which were not present in all runs: {', '.join(diff)}") | ||
names = list(intersection) | ||
|
||
# the similar invariant for number of minimal calls per run is not true: functions | ||
# may make a variable number of minimal() calls. | ||
# it would be nice to compare identically just the ones which don't vary, to get | ||
# a very fine grained comparison instead of averaging. | ||
# sizes = [] | ||
# for run in all_runs: | ||
# sizes.append(tuple(len(value) for value in run.values())) | ||
# assert len(set(sizes)) == 1 | ||
|
||
new_names = [] | ||
for name in names: | ||
if all(all(x == 0 for x in run[name]) for run in all_runs): | ||
print(f"no shrinks for {name}, skipping") | ||
continue | ||
new_names.append(name) | ||
names = new_names | ||
|
||
|
||
# name : average calls | ||
old_values = {} | ||
new_values = {} | ||
for name in names: | ||
|
||
# mean across the different minimal() calls in a single test function, then | ||
# median across the n iterations we ran that for to reduce error | ||
old_vals = [statistics.mean(run[name]) for run in old_runs] | ||
new_vals = [statistics.mean(run[name]) for run in new_runs] | ||
old_values[name] = statistics.median(old_vals) | ||
new_values[name] = statistics.median(new_vals) | ||
|
||
# name : (absolute difference, times difference) | ||
diffs = {} | ||
for name in names: | ||
old = old_values[name] | ||
new = new_values[name] | ||
diff = old - new | ||
diff_times = (old - new) / old | ||
if 0 < diff_times < 1: | ||
diff_times = (1 / (1 - diff_times)) - 1 | ||
diffs[name] = (diff, diff_times) | ||
|
||
print(f"{name} {int(diff)} ({int(old)} -> {int(new)}, {round(diff_times, 1)}✕)") | ||
|
||
diffs = dict(sorted(diffs.items(), key=lambda kv: kv[1][0])) | ||
diffs_value = [v[0] for v in diffs.values()] | ||
diffs_percentage = [v[1] for v in diffs.values()] | ||
|
||
print( | ||
f"mean: {int(statistics.mean(diffs_value))}, median: {int(statistics.median(diffs_value))}" | ||
) | ||
|
||
|
||
# https://stackoverflow.com/a/65824524 | ||
def align_axes(ax1, ax2): | ||
ax1_ylims = ax1.axes.get_ylim() | ||
ax1_yratio = ax1_ylims[0] / ax1_ylims[1] | ||
|
||
ax2_ylims = ax2.axes.get_ylim() | ||
ax2_yratio = ax2_ylims[0] / ax2_ylims[1] | ||
|
||
if ax1_yratio < ax2_yratio: | ||
ax2.set_ylim(bottom=ax2_ylims[1] * ax1_yratio) | ||
else: | ||
ax1.set_ylim(bottom=ax1_ylims[1] * ax2_yratio) | ||
|
||
|
||
ax1 = sns.barplot(diffs_value, color="b", alpha=0.7, label="shrink call change") | ||
ax2 = plt.twinx() | ||
sns.barplot(diffs_percentage, color="r", alpha=0.7, label=r"n✕ change", ax=ax2) | ||
|
||
ax1.set_title("old shrinks - new shrinks (aka shrinks saved, higher is better)") | ||
ax1.set_xticks([]) | ||
align_axes(ax1, ax2) | ||
legend = ax1.legend(labels=["shrink call change", "n✕ change"]) | ||
legend.legend_handles[0].set_color("b") | ||
legend.legend_handles[1].set_color("r") | ||
|
||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.