Skip to content

Commit

Permalink
Merge pull request #1947 from dandi/fix-dandiset-pagination
Browse files Browse the repository at this point in the history
Only use custom pagination class for asset list endpoint
  • Loading branch information
jjnesbitt authored Jun 3, 2024
2 parents 288b854 + 68904ee commit 6711a0a
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 102 deletions.
86 changes: 0 additions & 86 deletions dandiapi/api/tests/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,65 +3,6 @@
import pytest


@pytest.mark.django_db()
def test_dandiset_pagination(api_client, dandiset_factory):
endpoint = '/api/dandisets/'
for _ in range(10):
dandiset_factory()

# First page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5}).json()
assert resp['count'] == 10
assert resp['next'] is not None
page_one = resp['results']
assert len(page_one) == 5

# Second page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

# Full page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

# Assert full list is ordered the same as both paginated lists
assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_version_pagination(api_client, dandiset, published_version_factory):
endpoint = f'/api/dandisets/{dandiset.identifier}/versions/'

for _ in range(10):
published_version_factory(dandiset=dandiset)

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json()

assert resp['count'] == 10
page_one = resp['results']
assert len(page_one) == 5

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_asset_pagination(api_client, version, asset_factory):
endpoint = f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/'
Expand Down Expand Up @@ -92,30 +33,3 @@ def test_asset_pagination(api_client, version, asset_factory):

# Assert full list is ordered the same as both paginated lists
assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_zarr_pagination(api_client, zarr_archive_factory):
endpoint = '/api/zarr/'

for _ in range(10):
zarr_archive_factory()

resp = api_client.get(endpoint, {'page_size': 5}).json()
assert resp['count'] == 10
page_one = resp['results']
assert len(page_one) == 5

resp = api_client.get(endpoint, {'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

resp = api_client.get(endpoint, {'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

assert full_page == page_one + page_two
9 changes: 6 additions & 3 deletions dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
VERSIONS_DANDISET_PK_PARAM,
VERSIONS_VERSION_PARAM,
)
from dandiapi.api.views.pagination import DandiPagination
from dandiapi.api.views.pagination import DandiPagination, LazyPagination
from dandiapi.api.views.serializers import (
AssetDetailSerializer,
AssetDownloadQueryParameterSerializer,
Expand Down Expand Up @@ -431,7 +431,10 @@ def list(self, request, *args, **kwargs):
asset_queryset = asset_queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*'))

# Retrieve just the first N asset IDs, and use them for pagination
page_of_asset_ids = self.paginate_queryset(asset_queryset.values_list('id', flat=True))
# Use custom pagination class to reduce unnecessary counts of assets
paginator = LazyPagination()
qs = asset_queryset.values_list('id', flat=True)
page_of_asset_ids = paginator.paginate_queryset(qs, request=self.request, view=self)

# Not sure when the page is ever None, but this condition is checked for compatibility with
# the original implementation: https://github.com/encode/django-rest-framework/blob/f4194c4684420ac86485d9610adf760064db381f/rest_framework/mixins.py#L37-L46
Expand All @@ -453,7 +456,7 @@ def list(self, request, *args, **kwargs):

# Paginate and return
serializer = self.get_serializer(queryset, many=True, metadata=include_metadata)
return self.get_paginated_response(serializer.data)
return paginator.get_paginated_response(serializer.data)

@swagger_auto_schema(
query_serializer=AssetPathsQueryParameterSerializer,
Expand Down
31 changes: 18 additions & 13 deletions dandiapi/api/views/pagination.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
"""
Implement an optimized pagination scheme.
This module provides a custom pagination implementation, as the existing `PageNumberPagination`
class returns a `count` field for every page returned. This can be very inefficient on large tables,
and in reality, the count is only necessary on the first page of results. This module implements
such a pagination scheme, only returning 'count' on the first page of results.
"""

from __future__ import annotations

from collections import OrderedDict
Expand All @@ -17,6 +8,24 @@ class returns a `count` field for every page returned. This can be very ineffici
from rest_framework.response import Response


class DandiPagination(PageNumberPagination):
page_size = 100
max_page_size = 1000
page_size_query_param = 'page_size'

@cached_property
def page_size_query_description(self):
return f'{super().page_size_query_description[:-1]} (maximum {self.max_page_size}).'


"""
The below code provides a custom pagination implementation, as the existing `PageNumberPagination`
class returns a `count` field for every page returned. This can be very inefficient on large tables,
and in reality, the count is only necessary on the first page of results. This module implements
such a pagination scheme, only returning 'count' on the first page of results.
"""


class LazyPage(Page):
"""
A page class that doesn't call .count() on the queryset it's paging from.
Expand Down Expand Up @@ -96,7 +105,3 @@ def get_paginated_response(self, data) -> Response:
)

return Response(page_dict)


# Alias
DandiPagination = LazyPagination

0 comments on commit 6711a0a

Please sign in to comment.