Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bulk create with history with duplicates #1037

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Authors
- Tommy Beadle (`tbeadle <https://github.com/tbeadle>`_)
- Trey Hunner (`treyhunner <https://github.com/treyhunner>`_)
- Ulysses Vilela
- Weston Chan
- `vnagendra <https://github.com/vnagendra>`_
- `yakimka <https://github.com/yakimka>`_
- `Paulo Peres <https://github.com/PauloPeres>`_
Expand Down
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Unreleased

- Fixed typos in the docs
- Removed n+1 query from ``bulk_create_with_history`` utility (gh-975)
- Removed extra history objects when using ``bulk_create_with_history`` with duplicates (gh-1032)
- Started using ``exists`` query instead of ``count`` in ``populate_history`` command (gh-982)
- Added support for Django 4.1 (gh-1021)

Expand Down
34 changes: 22 additions & 12 deletions simple_history/tests/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest.mock import Mock, patch

from django.contrib.auth import get_user_model
from django.core.exceptions import FieldError
from django.db import IntegrityError, transaction
from django.test import TestCase, TransactionTestCase, override_settings
from django.utils import timezone
Expand Down Expand Up @@ -124,8 +125,8 @@ def test_bulk_create_history_with_default_date(self):
all([history.history_date == date for history in Poll.history.all()])
)

def test_bulk_create_history_num_queries_is_two(self):
with self.assertNumQueries(2):
def test_bulk_create_history_num_queries_is_three(self):
with self.assertNumQueries(3):
bulk_create_with_history(self.data, Poll)

def test_bulk_create_history_on_model_without_history_raises_error(self):
Expand All @@ -138,7 +139,7 @@ def test_bulk_create_history_on_model_without_history_raises_error(self):
bulk_create_with_history(self.data, Place)

def test_num_queries_when_batch_size_is_less_than_total(self):
with self.assertNumQueries(6):
with self.assertNumQueries(7):
bulk_create_with_history(self.data, Poll, batch_size=2)

def test_bulk_create_history_with_batch_size(self):
Expand Down Expand Up @@ -211,7 +212,7 @@ def mock_bulk_create(*args, **kwargs):
with patch.object(
Poll._default_manager, "bulk_create", side_effect=mock_bulk_create
):
with self.assertNumQueries(3):
with self.assertNumQueries(4):
result = bulk_create_with_history(objects, Poll)
self.assertEqual(
[poll.question for poll in result], [poll.question for poll in objects]
Expand Down Expand Up @@ -262,7 +263,7 @@ def test_bulk_create_history_rolls_back_when_last_exists(self):
self.assertEqual(Poll.history.count(), 1)

def test_bulk_create_fails_with_wrong_model(self):
with self.assertRaises(AttributeError):
with self.assertRaises(FieldError):
bulk_create_with_history(self.data, Document)

self.assertEqual(Poll.objects.count(), 0)
Expand All @@ -271,14 +272,11 @@ def test_bulk_create_fails_with_wrong_model(self):
@patch("simple_history.utils.get_history_manager_for_model")
def test_bulk_create_no_ids_return(self, hist_manager_mock):
objects = [Place(id=1, name="Place 1")]
model = Mock(
_default_manager=Mock(
bulk_create=Mock(return_value=[Place(name="Place 1")]),
filter=Mock(return_value=Mock(order_by=Mock(return_value=objects))),
),
_meta=Mock(get_fields=Mock(return_value=[])),
Place._default_manager.bulk_create = Mock(
side_effect=Place._default_manager.bulk_create,
return_value=[Place(name="Place 1")],
)
result = bulk_create_with_history(objects, model)
result = bulk_create_with_history(objects, Place)
self.assertEqual(result, objects)
hist_manager_mock().bulk_history_create.assert_called_with(
objects,
Expand All @@ -288,6 +286,18 @@ def test_bulk_create_no_ids_return(self, hist_manager_mock):
default_date=None,
)

def test_bulk_create_duplicate_objects_with_no_ids_return(self):
Street._default_manager.create(id=1, name="Duplicate Place")
obj = Street(name="Duplicate Place")
Street._default_manager.bulk_create = Mock(
side_effect=Street._default_manager.bulk_create,
return_value=[Street(name="Duplicate Place")],
)
result = bulk_create_with_history([obj], Street)
self.assertEqual(result, [Street._default_manager.last()])
self.assertEqual(Street._default_manager.count(), 2)
self.assertEqual(Street.log.count(), 2)


class BulkCreateWithManyToManyField(TestCase):
def setUp(self):
Expand Down
62 changes: 29 additions & 33 deletions simple_history/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,58 +93,54 @@ def bulk_create_with_history(
history_manager = get_history_manager_for_model(model)
model_manager = model._default_manager

second_transaction_required = True
with transaction.atomic(savepoint=False):
# Retrieve any duplicate existing objects,
# so we can exclude them when finding the created objects
cumulative_filter = None
obj_when_list = []
for i, obj in enumerate(objs):
attributes = {
k: v
for k, v in model_to_dict(obj, exclude=exclude_fields).items()
if v is not None
}
# https://stackoverflow.com/a/49625179/1960509
# DEV: If an attribute has `then` as a key
# then they'll also run into issues with `bulk_update`
# due to shared implementation
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
obj_when_list.append(When(**attributes, then=i))
q = Q(**attributes)
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
existing_objs_ids = (
list(model_manager.filter(cumulative_filter).values_list("pk", flat=True))
if cumulative_filter
else []
)
objs_with_id = model_manager.bulk_create(
objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts
)
if objs_with_id and objs_with_id[0].pk and not ignore_conflicts:
second_transaction_required = False
history_manager.bulk_history_create(
objs_with_id,
batch_size=batch_size,
default_user=default_user,
default_change_reason=default_change_reason,
default_date=default_date,
)
if second_transaction_required:
with transaction.atomic(savepoint=False):
# Generate a common query to avoid n+1 selections
# https://github.com/jazzband/django-simple-history/issues/974
cumulative_filter = None
obj_when_list = []
for i, obj in enumerate(objs_with_id):
attributes = dict(
filter(
lambda x: x[1] is not None,
model_to_dict(obj, exclude=exclude_fields).items(),
)
)
q = Q(**attributes)
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
# https://stackoverflow.com/a/49625179/1960509
# DEV: If an attribute has `then` as a key
# then they'll also run into issues with `bulk_update`
# due to shared implementation
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
obj_when_list.append(When(**attributes, then=i))
obj_list = (
list(
model_manager.filter(cumulative_filter).order_by(
Case(*obj_when_list)
)
)
if objs_with_id
else []
elif objs_with_id:
objs_with_id = list(
model_manager.filter(cumulative_filter)
.exclude(pk__in=existing_objs_ids)
.order_by(Case(*obj_when_list))
)
history_manager.bulk_history_create(
obj_list,
objs_with_id,
batch_size=batch_size,
default_user=default_user,
default_change_reason=default_change_reason,
default_date=default_date,
)
objs_with_id = obj_list
return objs_with_id


Expand Down