Skip to content

Commit

Permalink
⚡️ Increase performance of list_documents by eager loading
Browse files Browse the repository at this point in the history
  • Loading branch information
pajowu committed Nov 18, 2023
1 parent 3e5154b commit 69a6195
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 6 deletions.
35 changes: 29 additions & 6 deletions backend/transcribee_backend/models/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ class Task(TaskBase, table=True):
"secondaryjoin": "Task.id==TaskDependency.dependant_on_id",
},
)
dependency_links: List[TaskDependency] = Relationship(
sa_relationship_kwargs={
"primaryjoin": "Task.id==TaskDependency.dependent_task_id",
"viewonly": True,
},
)
dependants: List["Task"] = Relationship(
back_populates="dependencies",
link_model=TaskDependency,
Expand Down Expand Up @@ -155,12 +161,29 @@ class TaskResponse(TaskBase):

@classmethod
def from_orm(cls, task: Task, update={}) -> Self:
return super().from_orm(
task,
update={
"dependencies": [x.id for x in task.dependencies],
**update,
},
# The following code is equivalent to this:
# return super().from_orm(
# task,
# update={
# "dependencies": [x.dependant_on_id for x in task.dependency_links],
# **update,
# },
# )
# But much faster, because from_orm destructures the `obj` to mix it
# with the `update` dict, which causes an access to all attributes,
# including `dependencies`/`dependents` which are then all seperately
# selected from the database, causing many query
# Even with a small number of document this cuts the loading time of
# the `/api/v1/documents/` endpoint roughly in half on my test machine
return cls(
id=task.id,
state=task.state,
dependencies=[x.dependant_on_id for x in task.dependency_links],
current_attempt=None,
document_id=task.document_id,
task_type=task.task_type,
task_parameters=task.task_parameters,
**update,
)


Expand Down
7 changes: 7 additions & 0 deletions backend/transcribee_backend/routers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from fastapi.exceptions import RequestValidationError
from pydantic import BaseModel
from pydantic.error_wrappers import ErrorWrapper
from sqlalchemy.orm import selectinload
from sqlalchemy.sql.expression import desc
from sqlmodel import Session, col, select
from transcribee_proto.api import Document as ApiDocument
Expand Down Expand Up @@ -409,6 +410,12 @@ def list_documents(
select(Document)
.where(Document.user == token.user)
.order_by(desc(Document.changed_at), Document.id)
.options(
selectinload("tasks"),
selectinload("media_files"),
selectinload("media_files.tags"),
selectinload("tasks.dependency_links"),
)
)
results = session.exec(statement)
return [doc.as_api_document() for doc in results]
Expand Down

0 comments on commit 69a6195

Please sign in to comment.