Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Impelement get_homogeneous_pages. #87

Closed
wants to merge 64 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
ff21c99
Impelement get_homogeneous_pages.
mattalbr Jul 11, 2023
eff5d52
Check in unit test for multi-page lookup.
mattalbr Jul 13, 2023
d37132b
Fix missing symbols.
mattalbr Jul 13, 2023
df7466c
Fix flake8 test_paging.py
mattalbr Jul 13, 2023
4843e68
Export get_homogeneous_pages.
mattalbr Jul 13, 2023
9985e4e
Actually import get_homogeneous_pages.
mattalbr Jul 13, 2023
bd486e1
Add PageRequest.
mattalbr Jul 13, 2023
e534243
Fix bad symbol.
mattalbr Jul 13, 2023
5e6fd6a
Extend gathered.
mattalbr Jul 13, 2023
f5075c3
Use Union instead of |.
mattalbr Jul 13, 2023
9aae97f
Use Tuple instead of tuple.
mattalbr Jul 13, 2023
cbefa30
Use Book instead of Animal.
mattalbr Jul 13, 2023
bb86b7e
More python 3.7 and fix wrong symbol name.
mattalbr Jul 13, 2023
cdd0fc2
Improve tests.
mattalbr Jul 13, 2023
2af1ca2
Remove unused db session.
mattalbr Jul 13, 2023
76c2939
Exclude sqlite.
mattalbr Jul 13, 2023
6867d83
s/dburl/no_sqlite_url
mattalbr Jul 13, 2023
19ff009
s/url/dburl
mattalbr Jul 13, 2023
abdef8a
Fix for loop variable.
mattalbr Jul 13, 2023
a5514cf
Per page should be at least 1.
mattalbr Jul 14, 2023
b7a34f5
Fix UNION ALL ordering which isn't guaranteed.
mattalbr Jul 14, 2023
2f4f4bd
NamedTuple is immutable.
mattalbr Jul 14, 2023
3909554
Trailing whitespace.
mattalbr Jul 14, 2023
3393641
Use uo instead of element to get ordering right for ROW_NUMBER.
mattalbr Jul 14, 2023
f985063
Use deque for extendleft functionality.
mattalbr Jul 14, 2023
e5feac2
Debugging info.
mattalbr Jul 14, 2023
46248d2
Testing.
mattalbr Jul 14, 2023
c5a606e
Add before preparing paging.
mattalbr Jul 14, 2023
3ccdd3e
author_id is nullable which doesn't work with paging.
mattalbr Jul 14, 2023
0e7cc0b
Actually end the test on success.
mattalbr Jul 14, 2023
1378936
Add a test for fetching columns.
mattalbr Jul 14, 2023
b610c69
Make test homogeneous.
mattalbr Jul 14, 2023
d46719f
Move page_identifier inside prepare_paging.
mattalbr Jul 14, 2023
bdac930
Add select_homogeneous_pages and refactor tests.
mattalbr Jul 14, 2023
1127c0f
flake8 fixes.
mattalbr Jul 14, 2023
24943ea
More flake8.
mattalbr Jul 14, 2023
5bb4c5d
Convert deque to list.
mattalbr Jul 14, 2023
29985c4
Test select_homogeneous_pages.
mattalbr Jul 14, 2023
87a6a08
Flake 8 violations.
mattalbr Jul 14, 2023
993e45f
Fix default.
mattalbr Jul 14, 2023
23ec6da
non-default before default.
mattalbr Jul 14, 2023
aff47d8
Add page_identifier
mattalbr Jul 14, 2023
dfee9dd
filter -> where.
mattalbr Jul 14, 2023
403a397
Add debugging.
mattalbr Jul 14, 2023
ead71a3
Fix select statements.
mattalbr Jul 14, 2023
685fca4
Go back to Book orm.
mattalbr Jul 14, 2023
ccb3e91
add a print statement.
mattalbr Jul 18, 2023
4f62fba
Start with the simplest test first.
mattalbr Jul 18, 2023
43694f0
flake8
mattalbr Jul 18, 2023
34c41b2
Print statement caused exception.
mattalbr Jul 18, 2023
db47749
Don't double order single selects.
mattalbr Jul 18, 2023
90e9ae7
Fix test_core..._empty_queries.
mattalbr Jul 18, 2023
8ecb334
flake8.
mattalbr Jul 18, 2023
6c33a0f
One more test.
mattalbr Jul 18, 2023
e230d1a
Try using sub-select for key generation.
mattalbr Jul 18, 2023
0917ceb
See if individualized selected works for select(Book)
mattalbr Jul 18, 2023
719440c
Try a different way of forming the union.
mattalbr Jul 18, 2023
e192516
flake8
mattalbr Jul 18, 2023
6b596a5
Backtrack on testing, plus add a more fundamental test.
mattalbr Jul 18, 2023
806ef9d
Add a test.
mattalbr Jul 18, 2023
fa5fd49
Only execute one subselect as necessary.
mattalbr Jul 18, 2023
1b9133b
flake8
mattalbr Jul 18, 2023
1909b8e
Clean up single query select.
mattalbr Jul 18, 2023
1af24f8
Uncomment test to send Anthony error.
mattalbr Jul 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion sqlakeyset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,12 @@
from .paging import get_page, select_page, InvalidPage
from .paging import (
get_homogeneous_pages,
get_page,
select_homogeneous_pages,
select_page,
InvalidPage,
OrmPageRequest,
PageRequest,
)
from .results import (
Page,
Paging,
Expand All @@ -15,11 +23,15 @@
from .types import Keyset, Marker

__all__ = [
"get_homogeneous_pages",
"get_page",
"select_homogeneous_pages",
"select_page",
"serialize_bookmark",
"unserialize_bookmark",
"Page",
"PageRequest",
"OrmPageRequest",
"Paging",
"Keyset",
"Marker",
Expand Down
212 changes: 208 additions & 4 deletions sqlakeyset/paging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
from __future__ import annotations

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CI fails at flake8 with a bunch of undefined names - looks like you've forgotten some imports?

from functools import partial
from dataclasses import dataclass
from typing import (
Any,
Callable,
Generic,
List,
NamedTuple,
Optional,
Expand All @@ -16,12 +19,12 @@
)
from typing_extensions import Literal # to keep python 3.7 support

from sqlalchemy import tuple_, and_, or_
from sqlalchemy import tuple_, and_, or_, func, text
from sqlalchemy.engine import Connection
from sqlalchemy.engine.interfaces import Dialect
from sqlalchemy.orm import Session
from sqlalchemy.orm.query import Query
from sqlalchemy.sql.expression import ColumnElement
from sqlalchemy.sql.expression import ColumnElement, literal, union_all
from sqlalchemy.sql.selectable import Select

from .columns import OC, MappedOrderColumn, find_order_key, parse_ob_clause
Expand Down Expand Up @@ -152,6 +155,7 @@ def prepare_paging(
backwards: bool,
orm: Literal[True],
dialect: Dialect,
page_identifier: Optional[int] = None,
) -> _PagingQuery:
...

Expand All @@ -164,6 +168,7 @@ def prepare_paging(
backwards: bool,
orm: Literal[False],
dialect: Dialect,
page_identifier: Optional[int] = None,
) -> _PagingSelect:
...

Expand All @@ -175,6 +180,7 @@ def prepare_paging(
backwards: bool,
orm: bool,
dialect: Dialect,
page_identifier: Optional[int] = None,
) -> Union[_PagingQuery, _PagingSelect]:
if orm:
if not isinstance(q, Query):
Expand Down Expand Up @@ -203,6 +209,16 @@ def prepare_paging(
extra_columns = [
col.extra_column for col in mapped_ocols if col.extra_column is not None
]

# page_identifier is used for fetching multiple pages.
if page_identifier is not None:
extra_columns += [
literal(page_identifier).label("_page_identifier"),
func.ROW_NUMBER().over(
order_by=[c.uo for c in order_cols]
).label("_row_number"),
]

if hasattr(q, "add_columns"): # ORM or SQLAlchemy 1.4+
q = q.add_columns(*extra_columns)
else:
Expand Down Expand Up @@ -276,11 +292,13 @@ def core_get_page(
:param backwards: If ``True``, reverse pagination direction.
:returns: :class:`Page`
"""
# We need the result schema for the *original* query in order to properly
# trim off our extra_columns. As far as I can tell, this is the only
# In SQLAlchemy 1.3, we need the result schema for the *original* query in order
# to properly trim off our extra_columns. As far as I can tell, this is the only
# way to get it without copy-pasting chunks of the sqlalchemy internals.
# LIMIT 0 to minimize database load (though the fact that a round trip to
# the DB has to happen at all is regrettable).
#
# Thankfully this is obsolete in 1.4+
result_type = core_result_type(selectable, s)
sel = prepare_paging(
q=selectable,
Expand Down Expand Up @@ -438,3 +456,189 @@ def get_page(
place, backwards = process_args(after, before, page)

return orm_get_page(query, per_page, place, backwards)


@dataclass
class OrmPageRequest(Generic[_TP]):
"""See ``get_page()`` documentation for parameter explanations."""
query: Query[_TP]
per_page: int = PER_PAGE_DEFAULT
after: OptionalKeyset = None
before: OptionalKeyset = None
page: Optional[Union[MarkerLike, str]] = None


@dataclass
class PageRequest(Generic[_TP]):
"""See ``select_page()`` documentation for parameter explanations."""
selectable: Select[_TP]
per_page: int = PER_PAGE_DEFAULT
after: OptionalKeyset = None
before: OptionalKeyset = None
page: Optional[Union[MarkerLike, str]] = None


def get_homogeneous_pages(requests: list[OrmPageRequest[_TP]]) -> list[Page[Row[_TP]]]:
"""Get multiple pages of results for homogeneous legacy ORM queries.

This only involves a single round trip to the database. To do that, under the
hood it generates a UNION ALL. That means each query must select exactly the
same columns. They may have different filters or ordering, but must result in
selecting the same columns with the same names.

Note: This requires the underlying database to support ORDER BY and LIMIT
statements in components of a compound select, which SQLite does not.

Resulting pages are returned in the same order as the original page requests.
"""
if not requests:
return []

prepared_queries = [
_orm_prepare_homogeneous_page(request, i) for i, request in enumerate(requests)
]

query = prepared_queries[0].paging_query.query
query = query.union_all(
*[p.paging_query.query for p in prepared_queries[1:]]
).order_by(text("_page_identifier"), text("_row_number"))

results = query.all()

# We need to make sure there's an entry for every page in case some return
# empty.
page_to_rows = {i: list() for i in range(len(requests))}
for row in results:
page_to_rows[row._page_identifier].append(row)

pages = []
for i in range(len(requests)):
rows = page_to_rows[i]
pages.append(prepared_queries[i].page_from_rows(rows))
return pages


def select_homogeneous_pages(
requests: list[PageRequest[_TP]], s: Union[Session, Connection]
) -> list[Page[Row[_TP]]]:
"""Get multiple pages of results for homogeneous legacy ORM queries.

This only involves a single round trip to the database. To do that, under the
hood it generates a UNION ALL. That means each query must select exactly the
same columns. They may have different filters or ordering, but must result in
selecting the same columns with the same names.

Note: This requires the underlying database to support ORDER BY and LIMIT
statements in components of a compound select, which SQLite does not.

Resulting pages are returned in the same order as the original page requests.
"""
if not requests:
return []

prepared_queries = [_core_prepare_homogeneous_page(request, s, i) for i, request in enumerate(requests)]

if len(requests) == 1:
select = prepared_queries[0].paging_query.select
else:
select = union_all(
*[p.paging_query.select for p in prepared_queries]
)
select = select.order_by(text("_page_identifier"), text("_row_number"))

print(f"Select statement: {select}")
selected = s.execute(select)

results = selected.fetchall()

# We need to make sure there's an entry for every page in case some return
# empty.
page_to_rows = {i: list() for i in range(len(requests))}
for row in results:
page_to_rows[row._page_identifier].append(row)

pages = []
# This is an unfortunate side effect of union_all. It appears when we union_all
# a bunch of selects, it changes the "keys" on us in cases where the column
# name and python attribute don't match. So we have to execute the first
# query standalone to ge the correct keys.
subselect_result = (
s.execute(prepared_queries[0].paging_query.select)
if len(prepared_queries) > 1 else selected
)
for i in range(len(requests)):
rows = page_to_rows[i]
pages.append(prepared_queries[i].page_from_rows(rows, subselect_result))
return pages


@dataclass
class _PreparedQuery:
paging_query: Union[_PagingQuery, _PagingSelect]
page_from_rows: Callable[..., Page[Row[_TP]]]


def _core_prepare_homogeneous_page(
request: PageRequest[_TP], s: Union[Session, Connection], page_identifier: int
) -> _PreparedQuery:
place, backwards = process_args(request.after, request.before, request.page)

selectable = request.selectable
result_type = core_result_type(selectable, s)
sel = prepare_paging(
q=selectable,
per_page=request.per_page,
place=place,
backwards=backwards,
orm=False,
dialect=get_bind(q=selectable, s=s).dialect,
page_identifier=page_identifier,
)

def page_from_rows(rows, selected):
keys = list(selected.keys())
print(f"pre-shrunk keys: {keys}")
N = len(keys) - len(sel.extra_columns)
keys = keys[:N]
print(f"post-shrunk keys: {keys}")
if rows:
print(rows[0])
page = core_page_from_rows(
sel,
rows,
keys,
result_type,
request.per_page,
backwards,
current_place=place,
)
return page

return _PreparedQuery(paging_query=sel, page_from_rows=page_from_rows)


def _orm_prepare_homogeneous_page(
request: OrmPageRequest[_TP], page_identifier: int
) -> _PreparedQuery:
place, backwards = process_args(request.after, request.before, request.page)

query = request.query
result_type = orm_result_type(query)
keys = orm_query_keys(query)

paging_query = prepare_paging(
q=query,
per_page=request.per_page,
place=place,
backwards=backwards,
orm=True,
dialect=query.session.get_bind().dialect,
page_identifier=page_identifier,
)

def page_from_rows(rows):
return orm_page_from_rows(
paging_query, rows, keys, result_type, request.per_page, backwards, current_place=place
)

return _PreparedQuery(paging_query=paging_query, page_from_rows=page_from_rows)
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def _dburl(request):

dburl = pytest.fixture(params=SUPPORTED_ENGINES)(_dburl)
no_mysql_dburl = pytest.fixture(params=["sqlite", "postgresql"])(_dburl)
no_sqlite_dburl = pytest.fixture(params=["mysql", "postgresql"])(_dburl)
pg_only_dburl = pytest.fixture(params=["postgresql"])(_dburl)


Expand Down
Loading