Skip to content

Commit

Permalink
Fix bulk create with history with duplicates
Browse files Browse the repository at this point in the history
Issue: 1032
  • Loading branch information
WestonChan committed Sep 13, 2022
1 parent 529dbe2 commit 419f270
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 45 deletions.
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Authors
- Tommy Beadle (`tbeadle <https://github.com/tbeadle>`_)
- Trey Hunner (`treyhunner <https://github.com/treyhunner>`_)
- Ulysses Vilela
- Weston Chan
- `vnagendra <https://github.com/vnagendra>`_
- `yakimka <https://github.com/yakimka>`_
- `Paulo Peres <https://github.com/PauloPeres>`_
Expand Down
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Unreleased

- Fixed typos in the docs
- Removed n+1 query from ``bulk_create_with_history`` utility (gh-975)
- Removed extra history objects when using ``bulk_create_with_history`` with duplicates (gh-1032)
- Started using ``exists`` query instead of ``count`` in ``populate_history`` command (gh-982)
- Added support for Django 4.1 (gh-1021)

Expand Down
34 changes: 22 additions & 12 deletions simple_history/tests/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest.mock import Mock, patch

from django.contrib.auth import get_user_model
from django.core.exceptions import FieldError
from django.db import IntegrityError, transaction
from django.test import TestCase, TransactionTestCase, override_settings
from django.utils import timezone
Expand Down Expand Up @@ -124,8 +125,8 @@ def test_bulk_create_history_with_default_date(self):
all([history.history_date == date for history in Poll.history.all()])
)

def test_bulk_create_history_num_queries_is_two(self):
with self.assertNumQueries(2):
def test_bulk_create_history_num_queries_is_three(self):
with self.assertNumQueries(3):
bulk_create_with_history(self.data, Poll)

def test_bulk_create_history_on_model_without_history_raises_error(self):
Expand All @@ -138,7 +139,7 @@ def test_bulk_create_history_on_model_without_history_raises_error(self):
bulk_create_with_history(self.data, Place)

def test_num_queries_when_batch_size_is_less_than_total(self):
with self.assertNumQueries(6):
with self.assertNumQueries(7):
bulk_create_with_history(self.data, Poll, batch_size=2)

def test_bulk_create_history_with_batch_size(self):
Expand Down Expand Up @@ -211,7 +212,7 @@ def mock_bulk_create(*args, **kwargs):
with patch.object(
Poll._default_manager, "bulk_create", side_effect=mock_bulk_create
):
with self.assertNumQueries(3):
with self.assertNumQueries(4):
result = bulk_create_with_history(objects, Poll)
self.assertEqual(
[poll.question for poll in result], [poll.question for poll in objects]
Expand Down Expand Up @@ -262,7 +263,7 @@ def test_bulk_create_history_rolls_back_when_last_exists(self):
self.assertEqual(Poll.history.count(), 1)

def test_bulk_create_fails_with_wrong_model(self):
with self.assertRaises(AttributeError):
with self.assertRaises(FieldError):
bulk_create_with_history(self.data, Document)

self.assertEqual(Poll.objects.count(), 0)
Expand All @@ -271,14 +272,11 @@ def test_bulk_create_fails_with_wrong_model(self):
@patch("simple_history.utils.get_history_manager_for_model")
def test_bulk_create_no_ids_return(self, hist_manager_mock):
objects = [Place(id=1, name="Place 1")]
model = Mock(
_default_manager=Mock(
bulk_create=Mock(return_value=[Place(name="Place 1")]),
filter=Mock(return_value=Mock(order_by=Mock(return_value=objects))),
),
_meta=Mock(get_fields=Mock(return_value=[])),
Place._default_manager.bulk_create = Mock(
side_effect=Place._default_manager.bulk_create,
return_value=[Place(name="Place 1")],
)
result = bulk_create_with_history(objects, model)
result = bulk_create_with_history(objects, Place)
self.assertEqual(result, objects)
hist_manager_mock().bulk_history_create.assert_called_with(
objects,
Expand All @@ -288,6 +286,18 @@ def test_bulk_create_no_ids_return(self, hist_manager_mock):
default_date=None,
)

def test_bulk_create_duplicate_objects_with_no_ids_return(self):
Street._default_manager.create(id=1, name="Duplicate Place")
obj = Street(name="Duplicate Place")
Street._default_manager.bulk_create = Mock(
side_effect=Street._default_manager.bulk_create,
return_value=[Street(name="Duplicate Place")],
)
result = bulk_create_with_history([obj], Street)
self.assertEqual(result, [Street._default_manager.last()])
self.assertEqual(Street._default_manager.count(), 2)
self.assertEqual(Street.log.count(), 2)


class BulkCreateWithManyToManyField(TestCase):
def setUp(self):
Expand Down
60 changes: 27 additions & 33 deletions simple_history/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,58 +93,52 @@ def bulk_create_with_history(
history_manager = get_history_manager_for_model(model)
model_manager = model._default_manager

second_transaction_required = True
with transaction.atomic(savepoint=False):
# Retrieve any duplicate existing objects,
# so we can exclude them when finding the created objects
cumulative_filter = None
obj_when_list = []
for i, obj in enumerate(objs):
attributes = {
k: v
for k, v in model_to_dict(obj, exclude=exclude_fields).items()
if v is not None
}
# https://stackoverflow.com/a/49625179/1960509
# DEV: If an attribute has `then` as a key
# then they'll also run into issues with `bulk_update`
# due to shared implementation
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
obj_when_list.append(When(**attributes, then=i))
q = Q(**attributes)
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
existing_objs_ids = list(
model_manager.filter(cumulative_filter).values_list("pk", flat=True)
)
objs_with_id = model_manager.bulk_create(
objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts
)
if objs_with_id and objs_with_id[0].pk and not ignore_conflicts:
second_transaction_required = False
history_manager.bulk_history_create(
objs_with_id,
batch_size=batch_size,
default_user=default_user,
default_change_reason=default_change_reason,
default_date=default_date,
)
if second_transaction_required:
with transaction.atomic(savepoint=False):
# Generate a common query to avoid n+1 selections
# https://github.com/jazzband/django-simple-history/issues/974
cumulative_filter = None
obj_when_list = []
for i, obj in enumerate(objs_with_id):
attributes = dict(
filter(
lambda x: x[1] is not None,
model_to_dict(obj, exclude=exclude_fields).items(),
)
)
q = Q(**attributes)
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
# https://stackoverflow.com/a/49625179/1960509
# DEV: If an attribute has `then` as a key
# then they'll also run into issues with `bulk_update`
# due to shared implementation
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
obj_when_list.append(When(**attributes, then=i))
obj_list = (
list(
model_manager.filter(cumulative_filter).order_by(
Case(*obj_when_list)
)
)
if objs_with_id
else []
elif objs_with_id:
objs_with_id = list(
model_manager.filter(cumulative_filter)
.exclude(pk__in=existing_objs_ids)
.order_by(Case(*obj_when_list))
)
history_manager.bulk_history_create(
obj_list,
objs_with_id,
batch_size=batch_size,
default_user=default_user,
default_change_reason=default_change_reason,
default_date=default_date,
)
objs_with_id = obj_list
return objs_with_id


Expand Down

0 comments on commit 419f270

Please sign in to comment.