Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get rid of test front-loading in the duration_based_chunks algo #27

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions src/pytest_split/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def least_duration(splits: int, items: "List[nodes.Item]", durations: "Dict[str,
def duration_based_chunks(splits: int, items: "List[nodes.Item]", durations: "Dict[str, float]") -> "List[TestGroup]":
"""
Split tests into groups by runtime.

Ensures tests are split into non-overlapping groups.
The original list of test items is split into groups by finding boundary indices i_0, i_1, i_2
and creating group_1 = items[0:i_0], group_2 = items[i_0, i_1], group_3 = items[i_1, i_2], ...
Expand All @@ -80,15 +81,33 @@ def duration_based_chunks(splits: int, items: "List[nodes.Item]", durations: "Di
duration: "List[float]" = [0 for i in range(splits)]

group_idx = 0
for item in items:
if duration[group_idx] >= time_per_group:
test_count = len(items)
for index, item in enumerate(items):
item_duration = tests_and_durations.pop(item)

tests_left = test_count - index
groups_left = splits - group_idx

if not selected[group_idx]:
# Add test to current group if group has no tests
pass
elif tests_left < groups_left:
# Make sure that we assign at least one test to each group
group_idx += 1
elif duration[group_idx] + item_duration > time_per_group:
if group_idx + 1 >= splits:
# Stay with group index if it's the final group in the split
pass
else:
# Otherwise, bump index to add to the next group
group_idx += 1

selected[group_idx].append(item)
duration[group_idx] += item_duration

for i in range(splits):
if i != group_idx:
deselected[i].append(item)
duration[group_idx] += tests_and_durations.pop(item)

return [TestGroup(selected=selected[i], deselected=deselected[i], duration=duration[i]) for i in range(splits)]

Expand Down
28 changes: 26 additions & 2 deletions tests/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from pytest_split.algorithms import Algorithms
from src.pytest_split.algorithms import Algorithms, duration_based_chunks

item = namedtuple("item", "nodeid")

Expand All @@ -11,7 +11,7 @@ class TestAlgorithms:
@pytest.mark.parametrize("algo_name", Algorithms.names())
def test__split_test(self, algo_name):
durations = {"a": 1, "b": 1, "c": 1}
items = [item(x) for x in durations.keys()]
items = [item(x) for x in durations]
algo = Algorithms[algo_name].value
first, second, third = algo(splits=3, items=items, durations=durations)

Expand Down Expand Up @@ -83,3 +83,27 @@ def test__split_tests_calculates_avg_test_duration_only_on_present_tests(self, a
expected_first, expected_second = expected
assert first.selected == expected_first
assert second.selected == expected_second

def test_each_group_is_assigned_a_test_front_loaded(self):
item = namedtuple("item", "nodeid")

durations = {"a": 100, "b": 1, "c": 1, "d": 1, "e": 1, "f": 1, "g": 1, "h": 1}

items = [item(x) for x in ["a", "b", "c", "d", "e", "f", "g", "h"]]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be the same if the loop would be over sorted(durations.keys())?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean, sorry?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think he's saying you can do items = sorted(durations.keys()) instead of duplicating all of the items.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I'm basically asking if this would be the same:

items = [item(x) for x in sorted(durations.keys())]

IMO it would be better considering readability as then it's explicit that there's one item for each thingy in durations

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just copied in the example test from here and made sure we had one example that was extremely front-loaded and one that was extremely back-loaded. I can make it less explicit if you prefer.


groups = duration_based_chunks(8, items, durations)

for i in range(7):
Comment on lines +94 to +96
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 8 above and range(7) here in the loop below? Could we somehow get rid of the magical numbers and use e.g. variables which names would help the reader to understand what's going on?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe use for g in groups: instead?

assert groups[i].selected != []

def test_each_group_is_assigned_a_test_back_loaded(self):
item = namedtuple("item", "nodeid")

durations = {"a": 1, "b": 1, "c": 1, "d": 1, "e": 1, "f": 1, "g": 1, "h": 100}

items = [item(x) for x in ["a", "b", "c", "d", "e", "f", "g", "h"]]

groups = duration_based_chunks(8, items, durations)

for i in range(7):
assert groups[i].selected != []
Comment on lines +108 to +109
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this same as assert all(group.selected for group in groups)?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That looks like it would probably work; maybe just add a comment to indicate that we're making sure there aren't any empty groups. Or maybe explicitly check that the size of each group is nonzero.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we're making sure there aren't any empty groups ... check that the size of each group is nonzero

IMO all(group.selected for group in groups) does both and is explicit enough so no comments or further explanations would be needed. Just my opinion ofc 🙂

33 changes: 17 additions & 16 deletions tests/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,20 @@ class TestSplitToSuites:
"least_duration",
["test_1", "test_2", "test_3", "test_4", "test_5", "test_6", "test_7", "test_8", "test_9", "test_10"],
),
(2, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4", "test_5", "test_6", "test_7"]),
(2, 2, "duration_based_chunks", ["test_8", "test_9", "test_10"]),
(2, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4", "test_5", "test_6"]),
(2, 2, "duration_based_chunks", ["test_7", "test_8", "test_9", "test_10"]),
(2, 1, "least_duration", ["test_1", "test_3", "test_5", "test_7", "test_9"]),
(2, 2, "least_duration", ["test_2", "test_4", "test_6", "test_8", "test_10"]),
(3, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4", "test_5"]),
(3, 2, "duration_based_chunks", ["test_6", "test_7", "test_8"]),
(3, 3, "duration_based_chunks", ["test_9", "test_10"]),
(3, 2, "duration_based_chunks", ["test_6", "test_7"]),
(3, 3, "duration_based_chunks", ["test_8", "test_9", "test_10"]),
(3, 1, "least_duration", ["test_1", "test_4", "test_7", "test_10"]),
(3, 2, "least_duration", ["test_2", "test_5", "test_8"]),
(3, 3, "least_duration", ["test_3", "test_6", "test_9"]),
(4, 1, "duration_based_chunks", ["test_1", "test_2", "test_3", "test_4"]),
(4, 2, "duration_based_chunks", ["test_5", "test_6", "test_7"]),
(4, 3, "duration_based_chunks", ["test_8", "test_9"]),
(4, 4, "duration_based_chunks", ["test_10"]),
(4, 1, "duration_based_chunks", ["test_1", "test_2", "test_3"]),
(4, 2, "duration_based_chunks", ["test_4", "test_5"]),
(4, 3, "duration_based_chunks", ["test_6"]),
(4, 4, "duration_based_chunks", ["test_7", "test_8", "test_9", "test_10"]),
Comment on lines +74 to +77
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this mean that the expected durations for each group will be:

  • 1: 3s (1+1+1)
  • 2: 2s (1+1)
  • 3: 2s (2)
  • 4: 8s (2+2+2+2)

If so, it doesn't feel right 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

1-5 are 1 second tests, while 5-10 are 2 second tests, so I believe the timings are

  • 3 seconds
  • 3 seconds
  • 2 seconds
  • 8 seconds

And yes this is weird, but it's also what you should expect if you want to stop adding tests before going over the threshold of 15/4=3,75 seconds, and keeping absolute order 🤷

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a very simple greedy algorithm; it's pretty much unavoidable that you're going to get strange far-from-optimal results like this. The least_durations algorithm is much better and IMO should be the default and this algorithm should simply be removed. But this algorithm does have the benefit of maintaining the sequence of tests, which I suppose could potentially be beneficial in some cases if there are ordering dependencies between tests.

Copy link

@alexforencich alexforencich Jul 21, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, here's something to consider potentially: exceed the threshold, but keep track of a running total overage and use that when determining if you're over the threshold. (If you never exceed the threshold, then you're basically guaranteed to end up with the last group being larger than the rest) There are probably a few other heuristics that you could apply here to try to improve things a little bit. For instance, something like this: ignore overage if the slot is empty and unconditionally assign, if it isn't empty then check to see if the resulting overage is less than half of the time required for that test, if so pack it, otherwise bump it to the next group.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The least_durations algorithm is much better and IMO should be the default and this algorithm should simply be removed

I have a use case which requires this plugin to support maintaining the order of the tests, which is why we need to keep duration_based_chunks or something similar. However, I can consider changing the default but that'd probably bite some other users of this plugin as well so need to consider that carefully 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll close this for now then 👍

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, without these changes, is duration_based_chunks still producing empty groups? Or did that get fixed in a separate PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes probably. I noticed there are a few conflicts, so perhaps there's been another fix merged, but that's what this was meant to fix.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, well my point was that even if the splitting isn't as perfect as it could be, at least this fix doesn't result in empty groups, so it should be merged and then possibly improved, instead of being closed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure thing. I've reopened it - feel free to make any additional changes you wish (cc @jerry-git) and merge. I'm on holiday now myself so likely won't be able to apply any code changes for a while.

(4, 1, "least_duration", ["test_1", "test_5", "test_9"]),
(4, 2, "least_duration", ["test_2", "test_6", "test_10"]),
(4, 3, "least_duration", ["test_3", "test_7"]),
Expand Down Expand Up @@ -107,6 +107,7 @@ def test_it_splits(self, test_idx, splits, group, algo, expected, legacy_flag, e
"--splitting-algorithm",
algo,
)

result.assertoutcome(passed=len(expected))
assert _passed_test_names(result) == expected

Expand All @@ -128,16 +129,16 @@ def test_it_adapts_splits_based_on_new_and_deleted_tests(self, example_suite, du
json.dump(durations, f)

result = example_suite.inline_run("--splits", "3", "--group", "1", "--durations-path", durations_path)
result.assertoutcome(passed=4)
assert _passed_test_names(result) == ["test_1", "test_2", "test_3", "test_4"]
result.assertoutcome(passed=3)
assert _passed_test_names(result) == ["test_1", "test_2", "test_3"] # 3 sec

result = example_suite.inline_run("--splits", "3", "--group", "2", "--durations-path", durations_path)
result.assertoutcome(passed=3)
assert _passed_test_names(result) == ["test_5", "test_6", "test_7"]
result.assertoutcome(passed=1)
assert _passed_test_names(result) == ["test_4"] # 1 sec

result = example_suite.inline_run("--splits", "3", "--group", "3", "--durations-path", durations_path)
result.assertoutcome(passed=3)
assert _passed_test_names(result) == ["test_8", "test_9", "test_10"]
result.assertoutcome(passed=6)
assert _passed_test_names(result) == ["test_5", "test_6", "test_7", "test_8", "test_9", "test_10"] # 3 sec

def test_handles_case_of_no_durations_for_group(self, example_suite, durations_path):
with open(durations_path, "w") as f:
Expand All @@ -149,8 +150,8 @@ def test_handles_case_of_no_durations_for_group(self, example_suite, durations_p

def test_it_splits_with_other_collect_hooks(self, testdir, durations_path):
expected_tests_per_group = [
["test_1", "test_2", "test_3"],
["test_4", "test_5"],
["test_1", "test_2"],
["test_3", "test_4", "test_5"],
]

tests_to_run = "".join(f"@pytest.mark.mark_one\ndef test_{num}(): pass\n" for num in range(1, 6))
Expand Down