djrobstep · mattalbr · Jul 11, 2023 · Jul 13, 2023 · Jul 13, 2023 · Jul 13, 2023
diff --git a/sqlakeyset/__init__.py b/sqlakeyset/__init__.py
@@ -1,4 +1,12 @@
-from .paging import get_page, select_page, InvalidPage
+from .paging import (
+    get_homogeneous_pages,
+    get_page,
+    select_homogeneous_pages,
+    select_page,
+    InvalidPage,
+    OrmPageRequest,
+    PageRequest,
+)
 from .results import (
     Page,
     Paging,
@@ -15,11 +23,15 @@
 from .types import Keyset, Marker
 
 __all__ = [
+    "get_homogeneous_pages",
     "get_page",
+    "select_homogeneous_pages",
     "select_page",
     "serialize_bookmark",
     "unserialize_bookmark",
     "Page",
+    "PageRequest",
+    "OrmPageRequest",
     "Paging",
     "Keyset",
     "Marker",

diff --git a/sqlakeyset/paging.py b/sqlakeyset/paging.py
@@ -3,8 +3,11 @@
 from __future__ import annotations
 
 from functools import partial
+from dataclasses import dataclass
 from typing import (
     Any,
+    Callable,
+    Generic,
     List,
     NamedTuple,
     Optional,
@@ -16,12 +19,12 @@
 )
 from typing_extensions import Literal  # to keep python 3.7 support
 
-from sqlalchemy import tuple_, and_, or_
+from sqlalchemy import tuple_, and_, or_, func, text
 from sqlalchemy.engine import Connection
 from sqlalchemy.engine.interfaces import Dialect
 from sqlalchemy.orm import Session
 from sqlalchemy.orm.query import Query
-from sqlalchemy.sql.expression import ColumnElement
+from sqlalchemy.sql.expression import ColumnElement, literal, union_all
 from sqlalchemy.sql.selectable import Select
 
 from .columns import OC, MappedOrderColumn, find_order_key, parse_ob_clause
@@ -152,6 +155,7 @@ def prepare_paging(
     backwards: bool,
     orm: Literal[True],
     dialect: Dialect,
+    page_identifier: Optional[int] = None,
 ) -> _PagingQuery:
     ...
 
@@ -164,6 +168,7 @@ def prepare_paging(
     backwards: bool,
     orm: Literal[False],
     dialect: Dialect,
+    page_identifier: Optional[int] = None,
 ) -> _PagingSelect:
     ...
 
@@ -175,6 +180,7 @@ def prepare_paging(
     backwards: bool,
     orm: bool,
     dialect: Dialect,
+    page_identifier: Optional[int] = None,
 ) -> Union[_PagingQuery, _PagingSelect]:
     if orm:
         if not isinstance(q, Query):
@@ -203,6 +209,16 @@ def prepare_paging(
     extra_columns = [
         col.extra_column for col in mapped_ocols if col.extra_column is not None
     ]
+
+    # page_identifier is used for fetching multiple pages.
+    if page_identifier is not None:
+        extra_columns += [
+            literal(page_identifier).label("_page_identifier"),
+            func.ROW_NUMBER().over(
+                order_by=[c.uo for c in order_cols]
+            ).label("_row_number"),
+        ]
+
     if hasattr(q, "add_columns"):  # ORM or SQLAlchemy 1.4+
         q = q.add_columns(*extra_columns)
     else:
@@ -276,11 +292,13 @@ def core_get_page(
     :param backwards: If ``True``, reverse pagination direction.
     :returns: :class:`Page`
     """
-    # We need the result schema for the *original* query in order to properly
-    # trim off our extra_columns. As far as I can tell, this is the only
+    # In SQLAlchemy 1.3, we need the result schema for the *original* query in order
+    # to properly trim off our extra_columns. As far as I can tell, this is the only
     # way to get it without copy-pasting chunks of the sqlalchemy internals.
     # LIMIT 0 to minimize database load (though the fact that a round trip to
     # the DB has to happen at all is regrettable).
+    #
+    # Thankfully this is obsolete in 1.4+
     result_type = core_result_type(selectable, s)
     sel = prepare_paging(
         q=selectable,
@@ -438,3 +456,189 @@ def get_page(
     place, backwards = process_args(after, before, page)
 
     return orm_get_page(query, per_page, place, backwards)
+
+
+@dataclass
+class OrmPageRequest(Generic[_TP]):
+    """See ``get_page()`` documentation for parameter explanations."""
+    query: Query[_TP]
+    per_page: int = PER_PAGE_DEFAULT
+    after: OptionalKeyset = None
+    before: OptionalKeyset = None
+    page: Optional[Union[MarkerLike, str]] = None
+
+
+@dataclass
+class PageRequest(Generic[_TP]):
+    """See ``select_page()`` documentation for parameter explanations."""
+    selectable: Select[_TP]
+    per_page: int = PER_PAGE_DEFAULT
+    after: OptionalKeyset = None
+    before: OptionalKeyset = None
+    page: Optional[Union[MarkerLike, str]] = None
+
+
+def get_homogeneous_pages(requests: list[OrmPageRequest[_TP]]) -> list[Page[Row[_TP]]]:
+    """Get multiple pages of results for homogeneous legacy ORM queries.
+
+    This only involves a single round trip to the database. To do that, under the
+    hood it generates a UNION ALL. That means each query must select exactly the
+    same columns. They may have different filters or ordering, but must result in
+    selecting the same columns with the same names.
+
+    Note: This requires the underlying database to support ORDER BY and LIMIT
+    statements in components of a compound select, which SQLite does not.
+
+    Resulting pages are returned in the same order as the original page requests.
+    """
+    if not requests:
+        return []
+
+    prepared_queries = [
+        _orm_prepare_homogeneous_page(request, i) for i, request in enumerate(requests)
+    ]
+
+    query = prepared_queries[0].paging_query.query
+    query = query.union_all(
+        *[p.paging_query.query for p in prepared_queries[1:]]
+    ).order_by(text("_page_identifier"), text("_row_number"))
+
+    results = query.all()
+
+    # We need to make sure there's an entry for every page in case some return
+    # empty.
+    page_to_rows = {i: list() for i in range(len(requests))}
+    for row in results:
+        page_to_rows[row._page_identifier].append(row)
+
+    pages = []
+    for i in range(len(requests)):
+        rows = page_to_rows[i]
+        pages.append(prepared_queries[i].page_from_rows(rows))
+    return pages
+
+
+def select_homogeneous_pages(
+    requests: list[PageRequest[_TP]], s: Union[Session, Connection]
+) -> list[Page[Row[_TP]]]:
+    """Get multiple pages of results for homogeneous legacy ORM queries.
+
+    This only involves a single round trip to the database. To do that, under the
+    hood it generates a UNION ALL. That means each query must select exactly the
+    same columns. They may have different filters or ordering, but must result in
+    selecting the same columns with the same names.
+
+    Note: This requires the underlying database to support ORDER BY and LIMIT
+    statements in components of a compound select, which SQLite does not.
+
+    Resulting pages are returned in the same order as the original page requests.
+    """
+    if not requests:
+        return []
+
+    prepared_queries = [_core_prepare_homogeneous_page(request, s, i) for i, request in enumerate(requests)]
+
+    if len(requests) == 1:
+        select = prepared_queries[0].paging_query.select
+    else:
+        select = union_all(
+            *[p.paging_query.select for p in prepared_queries]
+        )
+        select = select.order_by(text("_page_identifier"), text("_row_number"))
+
+    print(f"Select statement: {select}")
+    selected = s.execute(select)
+
+    results = selected.fetchall()
+
+    # We need to make sure there's an entry for every page in case some return
+    # empty.
+    page_to_rows = {i: list() for i in range(len(requests))}
+    for row in results:
+        page_to_rows[row._page_identifier].append(row)
+
+    pages = []
+    # This is an unfortunate side effect of union_all. It appears when we union_all
+    # a bunch of selects, it changes the "keys" on us in cases where the column
+    # name and python attribute don't match. So we have to execute the first
+    # query standalone to ge the correct keys.
+    subselect_result = (
+        s.execute(prepared_queries[0].paging_query.select)
+        if len(prepared_queries) > 1 else selected
+    )
+    for i in range(len(requests)):
+        rows = page_to_rows[i]
+        pages.append(prepared_queries[i].page_from_rows(rows, subselect_result))
+    return pages
+
+
+@dataclass
+class _PreparedQuery:
+    paging_query: Union[_PagingQuery, _PagingSelect]
+    page_from_rows: Callable[..., Page[Row[_TP]]]
+
+
+def _core_prepare_homogeneous_page(
+    request: PageRequest[_TP], s: Union[Session, Connection], page_identifier: int
+) -> _PreparedQuery:
+    place, backwards = process_args(request.after, request.before, request.page)
+
+    selectable = request.selectable
+    result_type = core_result_type(selectable, s)
+    sel = prepare_paging(
+        q=selectable,
+        per_page=request.per_page,
+        place=place,
+        backwards=backwards,
+        orm=False,
+        dialect=get_bind(q=selectable, s=s).dialect,
+        page_identifier=page_identifier,
+    )
+
+    def page_from_rows(rows, selected):
+        keys = list(selected.keys())
+        print(f"pre-shrunk keys: {keys}")
+        N = len(keys) - len(sel.extra_columns)
+        keys = keys[:N]
+        print(f"post-shrunk keys: {keys}")
+        if rows:
+            print(rows[0])
+        page = core_page_from_rows(
+            sel,
+            rows,
+            keys,
+            result_type,
+            request.per_page,
+            backwards,
+            current_place=place,
+        )
+        return page
+
+    return _PreparedQuery(paging_query=sel, page_from_rows=page_from_rows)
+
+
+def _orm_prepare_homogeneous_page(
+    request: OrmPageRequest[_TP], page_identifier: int
+) -> _PreparedQuery:
+    place, backwards = process_args(request.after, request.before, request.page)
+
+    query = request.query
+    result_type = orm_result_type(query)
+    keys = orm_query_keys(query)
+
+    paging_query = prepare_paging(
+        q=query,
+        per_page=request.per_page,
+        place=place,
+        backwards=backwards,
+        orm=True,
+        dialect=query.session.get_bind().dialect,
+        page_identifier=page_identifier,
+    )
+
+    def page_from_rows(rows):
+        return orm_page_from_rows(
+            paging_query, rows, keys, result_type, request.per_page, backwards, current_place=place
+        )
+
+    return _PreparedQuery(paging_query=paging_query, page_from_rows=page_from_rows)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -246,6 +246,7 @@ def _dburl(request):
 
 dburl = pytest.fixture(params=SUPPORTED_ENGINES)(_dburl)
 no_mysql_dburl = pytest.fixture(params=["sqlite", "postgresql"])(_dburl)
+no_sqlite_dburl = pytest.fixture(params=["mysql", "postgresql"])(_dburl)
 pg_only_dburl = pytest.fixture(params=["postgresql"])(_dburl)