diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..873229456b --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,4 @@ +RELEASE_TYPE: patch + +This patch improves our shrinking of unique collections, such as :func:`~hypothesis.strategies.dictionaries`, +:func:`~hypothesis.strategies.sets`, and :func:`~hypothesis.strategies.lists` with ``unique=True``. diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/choicetree.py b/hypothesis-python/src/hypothesis/internal/conjecture/choicetree.py index c5de31ab3a..d7645e1889 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/choicetree.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/choicetree.py @@ -12,7 +12,7 @@ from random import Random from typing import Callable, Dict, Iterable, List, Optional, Sequence -from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, pop_random +from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy def prefix_selection_order( @@ -41,7 +41,8 @@ def random_selection_order(random: Random) -> Callable[[int, int], Iterable[int] def selection_order(depth: int, n: int) -> Iterable[int]: pending = LazySequenceCopy(range(n)) while pending: - yield pop_random(random, pending) + i = random.randrange(0, len(pending)) + yield pending.pop(i) return selection_order diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py b/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py index 61f9f37e51..716b5d3d82 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/junkdrawer.py @@ -31,6 +31,8 @@ overload, ) +from sortedcontainers import SortedList + from hypothesis.errors import HypothesisWarning ARRAY_CODES = ["B", "H", "I", "L", "Q", "O"] @@ -199,27 +201,36 @@ class LazySequenceCopy: in O(1) time. The full list API is not supported yet but there's no reason in principle it couldn't be.""" - __mask: Optional[Dict[int, int]] - def __init__(self, values: Sequence[int]): self.__values = values self.__len = len(values) - self.__mask = None + self.__mask: Optional[Dict[int, int]] = None + self.__popped_indices: Optional[SortedList] = None def __len__(self) -> int: - return self.__len + if self.__popped_indices is None: + return self.__len + return self.__len - len(self.__popped_indices) - def pop(self) -> int: + def pop(self, i: int = -1) -> int: if len(self) == 0: raise IndexError("Cannot pop from empty list") - result = self[-1] - self.__len -= 1 + i = self.__underlying_index(i) + + v = None if self.__mask is not None: - self.__mask.pop(self.__len, None) - return result + v = self.__mask.pop(i, None) + if v is None: + v = self.__values[i] + + if self.__popped_indices is None: + self.__popped_indices = SortedList() + self.__popped_indices.add(i) + return v def __getitem__(self, i: int) -> int: - i = self.__check_index(i) + i = self.__underlying_index(i) + default = self.__values[i] if self.__mask is None: return default @@ -227,18 +238,34 @@ def __getitem__(self, i: int) -> int: return self.__mask.get(i, default) def __setitem__(self, i: int, v: int) -> None: - i = self.__check_index(i) + i = self.__underlying_index(i) if self.__mask is None: self.__mask = {} self.__mask[i] = v - def __check_index(self, i: int) -> int: + def __underlying_index(self, i: int) -> int: n = len(self) if i < -n or i >= n: raise IndexError(f"Index {i} out of range [0, {n})") if i < 0: i += n assert 0 <= i < n + + if self.__popped_indices is not None: + # given an index i in the popped representation of the list, compute + # its corresponding index in the underlying list. given + # l = [1, 4, 2, 10, 188] + # l.pop(3) + # l.pop(1) + # assert l == [1, 2, 188] + # + # we want l[i] == self.__values[f(i)], where f is this function. + assert len(self.__popped_indices) <= len(self.__values) + + for idx in self.__popped_indices: + if idx > i: + break + i += 1 return i @@ -345,14 +372,6 @@ def find_integer(f: Callable[[int], bool]) -> int: return lo -def pop_random(random: Random, seq: LazySequenceCopy) -> int: - """Remove and return a random element of seq. This runs in O(1) but leaves - the sequence in an arbitrary order.""" - i = random.randrange(0, len(seq)) - swap(seq, i, len(seq) - 1) - return seq.pop() - - class NotFound(Exception): pass diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py b/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py index b43e6df5bc..a0451d7f86 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/pareto.py @@ -179,8 +179,7 @@ def add(self, data): failures = 0 while i + 1 < len(front) and failures < 10: j = self.__random.randrange(i + 1, len(front)) - swap(front, j, len(front) - 1) - candidate = front.pop() + candidate = front.pop(j) dom = dominance(data, candidate) assert dom != DominanceRelation.RIGHT_DOMINATES if dom == DominanceRelation.LEFT_DOMINATES: diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py index 90ffca6ca7..5d43390630 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py @@ -287,12 +287,8 @@ def do_draw(self, data): remaining = LazySequenceCopy(self.element_strategy.elements) while remaining and should_draw.more(): - i = len(remaining) - 1 - j = data.draw_integer(0, i) - if j != i: - remaining[i], remaining[j] = remaining[j], remaining[i] - value = self.element_strategy._transform(remaining.pop()) - + j = data.draw_integer(0, len(remaining) - 1) + value = self.element_strategy._transform(remaining.pop(j)) if value is not filter_not_satisfied and all( key(value) not in seen for key, seen in zip(self.keys, seen_sets) ): diff --git a/hypothesis-python/tests/conjecture/test_junkdrawer.py b/hypothesis-python/tests/conjecture/test_junkdrawer.py index 31662144bd..37c66d60af 100644 --- a/hypothesis-python/tests/conjecture/test_junkdrawer.py +++ b/hypothesis-python/tests/conjecture/test_junkdrawer.py @@ -8,6 +8,7 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import copy import inspect import pytest @@ -79,22 +80,31 @@ def test_clamp(lower, value, upper): assert clamped == upper -def test_pop_without_mask(): - y = [1, 2, 3] - x = LazySequenceCopy(y) - x.pop() - assert list(x) == [1, 2] - assert y == [1, 2, 3] - - -def test_pop_with_mask(): - y = [1, 2, 3] - x = LazySequenceCopy(y) - x[-1] = 5 - t = x.pop() - assert t == 5 - assert list(x) == [1, 2] - assert y == [1, 2, 3] +# this would be more robust as a stateful test, where each rule is a list operation +# on (1) the canonical python list and (2) its LazySequenceCopy. We would assert +# that the return values and lists match after each rule, and the original list +# is unmodified. +@pytest.mark.parametrize("should_mask", [True, False]) +@given(lst=st.lists(st.integers(), min_size=1), data=st.data()) +def test_pop_sequence_copy(lst, data, should_mask): + original = copy.copy(lst) + pop_i = data.draw(st.integers(0, len(lst) - 1)) + if should_mask: + mask_i = data.draw(st.integers(0, len(lst) - 1)) + mask_value = data.draw(st.integers()) + + def pop(l): + if should_mask: + l[mask_i] = mask_value + return l.pop(pop_i) + + expected = copy.copy(lst) + l = LazySequenceCopy(lst) + + assert pop(expected) == pop(l) + assert list(l) == expected + # modifications to the LazySequenceCopy should not modify the original list + assert original == lst def test_assignment(): diff --git a/hypothesis-python/tests/nocover/test_sampled_from.py b/hypothesis-python/tests/nocover/test_sampled_from.py index b0d5b59135..f72beef0ab 100644 --- a/hypothesis-python/tests/nocover/test_sampled_from.py +++ b/hypothesis-python/tests/nocover/test_sampled_from.py @@ -124,10 +124,10 @@ def test_flags_minimize_to_first_named_flag(): def test_flags_minimizes_bit_count(): - shrunk = minimal(st.sampled_from(LargeFlag), lambda f: bit_count(f.value) > 1) - # Ideal would be (bit0 | bit1), but: - # minimal(st.sets(st.sampled_from(range(10)), min_size=3)) == {0, 8, 9} # not {0, 1, 2} - assert shrunk == LargeFlag.bit0 | LargeFlag.bit63 # documents actual behaviour + assert ( + minimal(st.sampled_from(LargeFlag), lambda f: bit_count(f.value) > 1) + == LargeFlag.bit0 | LargeFlag.bit1 + ) def test_flags_finds_all_bits_set(): diff --git a/hypothesis-python/tests/quality/test_shrink_quality.py b/hypothesis-python/tests/quality/test_shrink_quality.py index 1586289dd0..a291176f02 100644 --- a/hypothesis-python/tests/quality/test_shrink_quality.py +++ b/hypothesis-python/tests/quality/test_shrink_quality.py @@ -107,6 +107,10 @@ def test_minimize_sets_of_sets(): assert any(s != t and t.issubset(s) for t in set_of_sets) +def test_minimize_sets_sampled_from(): + assert minimal(st.sets(st.sampled_from(range(10)), min_size=3)) == {0, 1, 2} + + def test_can_simplify_flatmap_with_bounded_left_hand_size(): assert ( minimal(booleans().flatmap(lambda x: lists(just(x))), lambda x: len(x) >= 10)