Skip to content

Breaking change: Improve save() performance by skipping index creation #2702

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion benchmarks/test_basic_doc_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
StringField,
)

mongoengine.connect(db="mongoengine_benchmark_test")
mongoengine.connect(db="mongoengine_benchmark_test", w=1)


def timeit(f, n=10000):
Expand Down
15 changes: 5 additions & 10 deletions benchmarks/test_inserts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,11 @@ def main():
setup = """
from pymongo import MongoClient

connection = MongoClient()
connection = MongoClient(w=1)
connection.drop_database('mongoengine_benchmark_test')
"""

stmt = """
from pymongo import MongoClient

connection = MongoClient()

db = connection.mongoengine_benchmark_test
noddy = db.noddy

Expand All @@ -29,13 +25,12 @@ def main():
"""

print("-" * 100)
print("PyMongo: Creating 10000 dictionaries.")
print('PyMongo: Creating 10000 dictionaries (write_concern={"w": 1}).')
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{t.timeit(1)}s")

stmt = """
from pymongo import MongoClient, WriteConcern
connection = MongoClient()
from pymongo import WriteConcern

db = connection.mongoengine_benchmark_test
noddy = db.noddy.with_options(write_concern=WriteConcern(w=0))
Expand Down Expand Up @@ -64,7 +59,7 @@ def main():
connection.close()

from mongoengine import Document, DictField, connect
connect("mongoengine_benchmark_test")
connect("mongoengine_benchmark_test", w=1)

class Noddy(Document):
fields = DictField()
Expand All @@ -82,7 +77,7 @@ class Noddy(Document):
"""

print("-" * 100)
print("MongoEngine: Creating 10000 dictionaries.")
print('MongoEngine: Creating 10000 dictionaries (write_concern={"w": 1}).')
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{t.timeit(1)}s")

Expand Down
87 changes: 87 additions & 0 deletions benchmarks/test_save_with_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import timeit


def main():
setup = """
from pymongo import MongoClient

connection = MongoClient()
connection.drop_database("mongoengine_benchmark_test")
connection.close()

from mongoengine import connect, Document, IntField, StringField
connect("mongoengine_benchmark_test", w=1)

class User0(Document):
name = StringField()
age = IntField()

class User1(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name"]]}

class User2(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name", "age"]]}

class User3(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name"]], "auto_create_index_on_save": True}

class User4(Document):
name = StringField()
age = IntField()
meta = {"indexes": [["name", "age"]], "auto_create_index_on_save": True}
"""

stmt = """
for i in range(10000):
User0(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 0 indexes.")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User1(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 1 index.")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User2(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 2 indexes.")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User3(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 1 index (auto_create_index_on_save=True).")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")

stmt = """
for i in range(10000):
User4(name="Nunu", age=9).save()
"""
print("-" * 80)
print("Save 10000 documents with 2 indexes (auto_create_index_on_save=True).")
t = timeit.Timer(stmt=stmt, setup=setup)
print(f"{min(t.repeat(repeat=3, number=1))}s")


if __name__ == "__main__":
main()
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ Development
- Support MONGODB-AWS authentication mechanism (with `authmechanismproperties`) #2507
- Turning off dereferencing for the results of distinct query. #2663
- Add tests against Mongo 5.0 in pipeline
- BREAKING CHANGE: Improved the performance of :meth:`~mongoengine.Document.save()`
by removing the call to :meth:`~mongoengine.Document.ensure_indexes` unless
``meta['auto_create_index_on_save']`` is set to True.
- Added meta ``auto_create_index_on_save`` so you can enable index creation
on :meth:`~mongoengine.Document.save()`.

Changes in 0.24.2
=================
Expand Down
8 changes: 7 additions & 1 deletion docs/guide/defining-documents.rst
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ There are a few top level defaults for all indexes that can be set::
'index_background': True,
'index_cls': False,
'auto_create_index': True,
'auto_create_index_on_save': False,
}


Expand All @@ -588,10 +589,15 @@ There are a few top level defaults for all indexes that can be set::

:attr:`auto_create_index` (Optional)
When this is True (default), MongoEngine will ensure that the correct
indexes exist in MongoDB each time a command is run. This can be disabled
indexes exist in MongoDB when the Document is first used. This can be disabled
in systems where indexes are managed separately. Disabling this will improve
performance.

:attr:`auto_create_index_on_save` (Optional)
When this is True, MongoEngine will ensure that the correct
indexes exist in MongoDB each time :meth:`~mongoengine.document.Document.save`
is run. Enabling this will degrade performance. The default is False. This
option was added in version 0.25.

Compound Indexes and Indexing sub documents
-------------------------------------------
Expand Down
10 changes: 9 additions & 1 deletion mongoengine/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,10 @@ def save(
meta['cascade'] = True. Also you can pass different kwargs to
the cascade save using cascade_kwargs which overwrites the
existing kwargs with custom values.
.. versionchanged:: 0.25
save() no longer calls :meth:`~mongoengine.Document.ensure_indexes`
unless ``meta['auto_create_index_on_save']`` is set to True.

"""
signal_kwargs = signal_kwargs or {}

Expand All @@ -407,8 +411,12 @@ def save(
# it might be refreshed by the pre_save_post_validation hook, e.g., for etag generation
doc = self.to_mongo()

if self._meta.get("auto_create_index", True):
if self._meta.get("auto_create_index_on_save", False):
self.ensure_indexes()
else:
# Call _get_collection so that errors from ensure_indexes are not
# wrapped in OperationError, see test_primary_key_unique_not_working.
self._get_collection()

try:
# Save a new document or update an existing one
Expand Down
66 changes: 37 additions & 29 deletions tests/document/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -983,44 +983,52 @@ class Book(Document):

def test_indexes_after_database_drop(self):
"""
Test to ensure that indexes are re-created on a collection even
after the database has been dropped.
Test to ensure that indexes are not re-created on a collection
after the database has been dropped unless auto_create_index_on_save
is enabled.

Issue #812
Issue #812 and #1446.
"""
# Use a new connection and database since dropping the database could
# cause concurrent tests to fail.
connection = connect(
db="tempdatabase", alias="test_indexes_after_database_drop"
)
tmp_alias = "test_indexes_after_database_drop"
connection = connect(db="tempdatabase", alias=tmp_alias)
self.addCleanup(connection.drop_database, "tempdatabase")

class BlogPost(Document):
title = StringField()
slug = StringField(unique=True)
meta = {"db_alias": tmp_alias}

meta = {"db_alias": "test_indexes_after_database_drop"}
BlogPost.drop_collection()
BlogPost(slug="test").save()
with pytest.raises(NotUniqueError):
BlogPost(slug="test").save()

try:
BlogPost.drop_collection()

# Create Post #1
post1 = BlogPost(title="test1", slug="test")
post1.save()

# Drop the Database
connection.drop_database("tempdatabase")

# Re-create Post #1
post1 = BlogPost(title="test1", slug="test")
post1.save()

# Create Post #2
post2 = BlogPost(title="test2", slug="test")
with pytest.raises(NotUniqueError):
post2.save()
finally:
# Drop the temporary database at the end
connection.drop_database("tempdatabase")
# Drop the Database
connection.drop_database("tempdatabase")
BlogPost(slug="test").save()
# No error because the index was not recreated after dropping the database.
BlogPost(slug="test").save()

# Repeat with auto_create_index_on_save: True.
class BlogPost2(Document):
slug = StringField(unique=True)
meta = {
"db_alias": tmp_alias,
"auto_create_index_on_save": True,
}

BlogPost2.drop_collection()
BlogPost2(slug="test").save()
with pytest.raises(NotUniqueError):
BlogPost2(slug="test").save()

# Drop the Database
connection.drop_database("tempdatabase")
BlogPost2(slug="test").save()
# Error because ensure_indexes is run on every save().
with pytest.raises(NotUniqueError):
BlogPost2(slug="test").save()

def test_index_dont_send_cls_option(self):
"""
Expand Down