Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement LazySequenceCopy.pop(i) #3987

Merged
merged 8 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
RELEASE_TYPE: patch

This patch improves our shrinking of unique collections, such as :func:`~hypothesis.strategies.dictionaries`,
:func:`~hypothesis.strategies.sets`, and :func:`~hypothesis.strategies.lists` with ``unique=True``.
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from random import Random
from typing import Callable, Dict, Iterable, List, Optional, Sequence

from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, pop_random
from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy


def prefix_selection_order(
Expand Down Expand Up @@ -41,7 +41,8 @@ def random_selection_order(random: Random) -> Callable[[int, int], Iterable[int]
def selection_order(depth: int, n: int) -> Iterable[int]:
pending = LazySequenceCopy(range(n))
while pending:
yield pop_random(random, pending)
i = random.randrange(0, len(pending))
yield pending.pop(i)

return selection_order

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
overload,
)

from sortedcontainers import SortedList

from hypothesis.errors import HypothesisWarning

ARRAY_CODES = ["B", "H", "I", "L", "Q", "O"]
Expand Down Expand Up @@ -199,46 +201,71 @@ class LazySequenceCopy:
in O(1) time. The full list API is not supported yet but there's no reason
in principle it couldn't be."""

__mask: Optional[Dict[int, int]]

def __init__(self, values: Sequence[int]):
self.__values = values
self.__len = len(values)
self.__mask = None
self.__mask: Optional[Dict[int, int]] = None
self.__popped_indices: Optional[SortedList] = None

def __len__(self) -> int:
return self.__len
if self.__popped_indices is None:
return self.__len
return self.__len - len(self.__popped_indices)

def pop(self) -> int:
def pop(self, i: int = -1) -> int:
if len(self) == 0:
raise IndexError("Cannot pop from empty list")
result = self[-1]
self.__len -= 1
i = self.__underlying_index(i)

v = None
if self.__mask is not None:
self.__mask.pop(self.__len, None)
return result
v = self.__mask.pop(i, None)
if v is None:
v = self.__values[i]

if self.__popped_indices is None:
self.__popped_indices = SortedList()
self.__popped_indices.add(i)
return v

def __getitem__(self, i: int) -> int:
i = self.__check_index(i)
i = self.__underlying_index(i)

default = self.__values[i]
if self.__mask is None:
return default
else:
return self.__mask.get(i, default)

def __setitem__(self, i: int, v: int) -> None:
i = self.__check_index(i)
i = self.__underlying_index(i)
if self.__mask is None:
self.__mask = {}
self.__mask[i] = v

def __check_index(self, i: int) -> int:
def __underlying_index(self, i: int) -> int:
n = len(self)
if i < -n or i >= n:
raise IndexError(f"Index {i} out of range [0, {n})")
if i < 0:
i += n
assert 0 <= i < n

if self.__popped_indices is not None:
# given an index i in the popped representation of the list, compute
# its corresponding index in the underlying list. given
# l = [1, 4, 2, 10, 188]
# l.pop(3)
# l.pop(1)
# assert l == [1, 2, 188]
#
# we want l[i] == self.__values[f(i)], where f is this function.
assert len(self.__popped_indices) <= len(self.__values)

for idx in self.__popped_indices:
if idx > i:
break
i += 1
return i


Expand Down Expand Up @@ -345,14 +372,6 @@ def find_integer(f: Callable[[int], bool]) -> int:
return lo


def pop_random(random: Random, seq: LazySequenceCopy) -> int:
"""Remove and return a random element of seq. This runs in O(1) but leaves
the sequence in an arbitrary order."""
i = random.randrange(0, len(seq))
swap(seq, i, len(seq) - 1)
return seq.pop()


class NotFound(Exception):
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ def add(self, data):
failures = 0
while i + 1 < len(front) and failures < 10:
j = self.__random.randrange(i + 1, len(front))
swap(front, j, len(front) - 1)
candidate = front.pop()
candidate = front.pop(j)
dom = dominance(data, candidate)
assert dom != DominanceRelation.RIGHT_DOMINATES
if dom == DominanceRelation.LEFT_DOMINATES:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,8 @@ def do_draw(self, data):
remaining = LazySequenceCopy(self.element_strategy.elements)

while remaining and should_draw.more():
i = len(remaining) - 1
j = data.draw_integer(0, i)
if j != i:
remaining[i], remaining[j] = remaining[j], remaining[i]
value = self.element_strategy._transform(remaining.pop())

j = data.draw_integer(0, len(remaining) - 1)
value = self.element_strategy._transform(remaining.pop(j))
if value is not filter_not_satisfied and all(
key(value) not in seen for key, seen in zip(self.keys, seen_sets)
):
Expand Down
42 changes: 26 additions & 16 deletions hypothesis-python/tests/conjecture/test_junkdrawer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

import copy
import inspect

import pytest
Expand Down Expand Up @@ -79,22 +80,31 @@ def test_clamp(lower, value, upper):
assert clamped == upper


def test_pop_without_mask():
y = [1, 2, 3]
x = LazySequenceCopy(y)
x.pop()
assert list(x) == [1, 2]
assert y == [1, 2, 3]


def test_pop_with_mask():
y = [1, 2, 3]
x = LazySequenceCopy(y)
x[-1] = 5
t = x.pop()
assert t == 5
assert list(x) == [1, 2]
assert y == [1, 2, 3]
# this would be more robust as a stateful test, where each rule is a list operation
# on (1) the canonical python list and (2) its LazySequenceCopy. We would assert
# that the return values and lists match after each rule, and the original list
# is unmodified.
Comment on lines +83 to +86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this would be nice but is not merge-blocking important.

@pytest.mark.parametrize("should_mask", [True, False])
@given(lst=st.lists(st.integers(), min_size=1), data=st.data())
def test_pop_sequence_copy(lst, data, should_mask):
original = copy.copy(lst)
pop_i = data.draw(st.integers(0, len(lst) - 1))
if should_mask:
mask_i = data.draw(st.integers(0, len(lst) - 1))
mask_value = data.draw(st.integers())

def pop(l):
if should_mask:
l[mask_i] = mask_value
return l.pop(pop_i)

expected = copy.copy(lst)
l = LazySequenceCopy(lst)

assert pop(expected) == pop(l)
assert list(l) == expected
# modifications to the LazySequenceCopy should not modify the original list
assert original == lst


def test_assignment():
Expand Down
8 changes: 4 additions & 4 deletions hypothesis-python/tests/nocover/test_sampled_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,10 @@ def test_flags_minimize_to_first_named_flag():


def test_flags_minimizes_bit_count():
shrunk = minimal(st.sampled_from(LargeFlag), lambda f: bit_count(f.value) > 1)
# Ideal would be (bit0 | bit1), but:
# minimal(st.sets(st.sampled_from(range(10)), min_size=3)) == {0, 8, 9} # not {0, 1, 2}
assert shrunk == LargeFlag.bit0 | LargeFlag.bit63 # documents actual behaviour
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, I remember scratching my head over this!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks to your comment here, it was the only reason I looked into this 🙂

assert (
minimal(st.sampled_from(LargeFlag), lambda f: bit_count(f.value) > 1)
== LargeFlag.bit0 | LargeFlag.bit1
)


def test_flags_finds_all_bits_set():
Expand Down
4 changes: 4 additions & 0 deletions hypothesis-python/tests/quality/test_shrink_quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ def test_minimize_sets_of_sets():
assert any(s != t and t.issubset(s) for t in set_of_sets)


def test_minimize_sets_sampled_from():
assert minimal(st.sets(st.sampled_from(range(10)), min_size=3)) == {0, 1, 2}


def test_can_simplify_flatmap_with_bounded_left_hand_size():
assert (
minimal(booleans().flatmap(lambda x: lists(just(x))), lambda x: len(x) >= 10)
Expand Down
Loading