From 419f2702163ba8ab8df8500011a6015dfac42be2 Mon Sep 17 00:00:00 2001 From: Weston Chan Date: Tue, 13 Sep 2022 10:10:17 -0500 Subject: [PATCH] Fix bulk create with history with duplicates Issue: 1032 --- AUTHORS.rst | 1 + CHANGES.rst | 1 + simple_history/tests/tests/test_utils.py | 34 +++++++++----- simple_history/utils.py | 60 +++++++++++------------- 4 files changed, 51 insertions(+), 45 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 5d3a9d565..5cf2975c2 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -119,6 +119,7 @@ Authors - Tommy Beadle (`tbeadle `_) - Trey Hunner (`treyhunner `_) - Ulysses Vilela +- Weston Chan - `vnagendra `_ - `yakimka `_ - `Paulo Peres `_ diff --git a/CHANGES.rst b/CHANGES.rst index a26e8d748..c994d43e1 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,7 @@ Unreleased - Fixed typos in the docs - Removed n+1 query from ``bulk_create_with_history`` utility (gh-975) +- Removed extra history objects when using ``bulk_create_with_history`` with duplicates (gh-1032) - Started using ``exists`` query instead of ``count`` in ``populate_history`` command (gh-982) - Added support for Django 4.1 (gh-1021) diff --git a/simple_history/tests/tests/test_utils.py b/simple_history/tests/tests/test_utils.py index 0b629a439..7c1557dcd 100644 --- a/simple_history/tests/tests/test_utils.py +++ b/simple_history/tests/tests/test_utils.py @@ -2,6 +2,7 @@ from unittest.mock import Mock, patch from django.contrib.auth import get_user_model +from django.core.exceptions import FieldError from django.db import IntegrityError, transaction from django.test import TestCase, TransactionTestCase, override_settings from django.utils import timezone @@ -124,8 +125,8 @@ def test_bulk_create_history_with_default_date(self): all([history.history_date == date for history in Poll.history.all()]) ) - def test_bulk_create_history_num_queries_is_two(self): - with self.assertNumQueries(2): + def test_bulk_create_history_num_queries_is_three(self): + with self.assertNumQueries(3): bulk_create_with_history(self.data, Poll) def test_bulk_create_history_on_model_without_history_raises_error(self): @@ -138,7 +139,7 @@ def test_bulk_create_history_on_model_without_history_raises_error(self): bulk_create_with_history(self.data, Place) def test_num_queries_when_batch_size_is_less_than_total(self): - with self.assertNumQueries(6): + with self.assertNumQueries(7): bulk_create_with_history(self.data, Poll, batch_size=2) def test_bulk_create_history_with_batch_size(self): @@ -211,7 +212,7 @@ def mock_bulk_create(*args, **kwargs): with patch.object( Poll._default_manager, "bulk_create", side_effect=mock_bulk_create ): - with self.assertNumQueries(3): + with self.assertNumQueries(4): result = bulk_create_with_history(objects, Poll) self.assertEqual( [poll.question for poll in result], [poll.question for poll in objects] @@ -262,7 +263,7 @@ def test_bulk_create_history_rolls_back_when_last_exists(self): self.assertEqual(Poll.history.count(), 1) def test_bulk_create_fails_with_wrong_model(self): - with self.assertRaises(AttributeError): + with self.assertRaises(FieldError): bulk_create_with_history(self.data, Document) self.assertEqual(Poll.objects.count(), 0) @@ -271,14 +272,11 @@ def test_bulk_create_fails_with_wrong_model(self): @patch("simple_history.utils.get_history_manager_for_model") def test_bulk_create_no_ids_return(self, hist_manager_mock): objects = [Place(id=1, name="Place 1")] - model = Mock( - _default_manager=Mock( - bulk_create=Mock(return_value=[Place(name="Place 1")]), - filter=Mock(return_value=Mock(order_by=Mock(return_value=objects))), - ), - _meta=Mock(get_fields=Mock(return_value=[])), + Place._default_manager.bulk_create = Mock( + side_effect=Place._default_manager.bulk_create, + return_value=[Place(name="Place 1")], ) - result = bulk_create_with_history(objects, model) + result = bulk_create_with_history(objects, Place) self.assertEqual(result, objects) hist_manager_mock().bulk_history_create.assert_called_with( objects, @@ -288,6 +286,18 @@ def test_bulk_create_no_ids_return(self, hist_manager_mock): default_date=None, ) + def test_bulk_create_duplicate_objects_with_no_ids_return(self): + Street._default_manager.create(id=1, name="Duplicate Place") + obj = Street(name="Duplicate Place") + Street._default_manager.bulk_create = Mock( + side_effect=Street._default_manager.bulk_create, + return_value=[Street(name="Duplicate Place")], + ) + result = bulk_create_with_history([obj], Street) + self.assertEqual(result, [Street._default_manager.last()]) + self.assertEqual(Street._default_manager.count(), 2) + self.assertEqual(Street.log.count(), 2) + class BulkCreateWithManyToManyField(TestCase): def setUp(self): diff --git a/simple_history/utils.py b/simple_history/utils.py index a3b405bc0..6dbc7af00 100644 --- a/simple_history/utils.py +++ b/simple_history/utils.py @@ -93,13 +93,32 @@ def bulk_create_with_history( history_manager = get_history_manager_for_model(model) model_manager = model._default_manager - second_transaction_required = True with transaction.atomic(savepoint=False): + # Retrieve any duplicate existing objects, + # so we can exclude them when finding the created objects + cumulative_filter = None + obj_when_list = [] + for i, obj in enumerate(objs): + attributes = { + k: v + for k, v in model_to_dict(obj, exclude=exclude_fields).items() + if v is not None + } + # https://stackoverflow.com/a/49625179/1960509 + # DEV: If an attribute has `then` as a key + # then they'll also run into issues with `bulk_update` + # due to shared implementation + # https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638 + obj_when_list.append(When(**attributes, then=i)) + q = Q(**attributes) + cumulative_filter = (cumulative_filter | q) if cumulative_filter else q + existing_objs_ids = list( + model_manager.filter(cumulative_filter).values_list("pk", flat=True) + ) objs_with_id = model_manager.bulk_create( objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts ) if objs_with_id and objs_with_id[0].pk and not ignore_conflicts: - second_transaction_required = False history_manager.bulk_history_create( objs_with_id, batch_size=batch_size, @@ -107,44 +126,19 @@ def bulk_create_with_history( default_change_reason=default_change_reason, default_date=default_date, ) - if second_transaction_required: - with transaction.atomic(savepoint=False): - # Generate a common query to avoid n+1 selections - # https://github.com/jazzband/django-simple-history/issues/974 - cumulative_filter = None - obj_when_list = [] - for i, obj in enumerate(objs_with_id): - attributes = dict( - filter( - lambda x: x[1] is not None, - model_to_dict(obj, exclude=exclude_fields).items(), - ) - ) - q = Q(**attributes) - cumulative_filter = (cumulative_filter | q) if cumulative_filter else q - # https://stackoverflow.com/a/49625179/1960509 - # DEV: If an attribute has `then` as a key - # then they'll also run into issues with `bulk_update` - # due to shared implementation - # https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638 - obj_when_list.append(When(**attributes, then=i)) - obj_list = ( - list( - model_manager.filter(cumulative_filter).order_by( - Case(*obj_when_list) - ) - ) - if objs_with_id - else [] + elif objs_with_id: + objs_with_id = list( + model_manager.filter(cumulative_filter) + .exclude(pk__in=existing_objs_ids) + .order_by(Case(*obj_when_list)) ) history_manager.bulk_history_create( - obj_list, + objs_with_id, batch_size=batch_size, default_user=default_user, default_change_reason=default_change_reason, default_date=default_date, ) - objs_with_id = obj_list return objs_with_id