Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only use custom pagination class for asset list endpoint #1947

Merged
merged 1 commit into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 0 additions & 86 deletions dandiapi/api/tests/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,65 +3,6 @@
import pytest


@pytest.mark.django_db()
def test_dandiset_pagination(api_client, dandiset_factory):
endpoint = '/api/dandisets/'
for _ in range(10):
dandiset_factory()

# First page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5}).json()
assert resp['count'] == 10
assert resp['next'] is not None
page_one = resp['results']
assert len(page_one) == 5

# Second page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

# Full page
resp = api_client.get(endpoint, {'order': 'id', 'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

# Assert full list is ordered the same as both paginated lists
assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_version_pagination(api_client, dandiset, published_version_factory):
endpoint = f'/api/dandisets/{dandiset.identifier}/versions/'

for _ in range(10):
published_version_factory(dandiset=dandiset)

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json()

assert resp['count'] == 10
page_one = resp['results']
assert len(page_one) == 5

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_asset_pagination(api_client, version, asset_factory):
endpoint = f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/'
Expand Down Expand Up @@ -92,30 +33,3 @@ def test_asset_pagination(api_client, version, asset_factory):

# Assert full list is ordered the same as both paginated lists
assert full_page == page_one + page_two


@pytest.mark.django_db()
def test_zarr_pagination(api_client, zarr_archive_factory):
endpoint = '/api/zarr/'

for _ in range(10):
zarr_archive_factory()

resp = api_client.get(endpoint, {'page_size': 5}).json()
assert resp['count'] == 10
page_one = resp['results']
assert len(page_one) == 5

resp = api_client.get(endpoint, {'page_size': 5, 'page': 2}).json()
assert resp['count'] is None
assert resp['next'] is None
page_two = resp['results']
assert len(page_two) == 5

resp = api_client.get(endpoint, {'page_size': 100}).json()
assert resp['count'] == 10
assert resp['next'] is None
full_page = resp['results']
assert len(full_page) == 10

assert full_page == page_one + page_two
9 changes: 6 additions & 3 deletions dandiapi/api/views/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
VERSIONS_DANDISET_PK_PARAM,
VERSIONS_VERSION_PARAM,
)
from dandiapi.api.views.pagination import DandiPagination
from dandiapi.api.views.pagination import DandiPagination, LazyPagination
from dandiapi.api.views.serializers import (
AssetDetailSerializer,
AssetDownloadQueryParameterSerializer,
Expand Down Expand Up @@ -431,7 +431,10 @@ def list(self, request, *args, **kwargs):
asset_queryset = asset_queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*'))

# Retrieve just the first N asset IDs, and use them for pagination
page_of_asset_ids = self.paginate_queryset(asset_queryset.values_list('id', flat=True))
# Use custom pagination class to reduce unnecessary counts of assets
paginator = LazyPagination()
qs = asset_queryset.values_list('id', flat=True)
page_of_asset_ids = paginator.paginate_queryset(qs, request=self.request, view=self)

# Not sure when the page is ever None, but this condition is checked for compatibility with
# the original implementation: https://github.com/encode/django-rest-framework/blob/f4194c4684420ac86485d9610adf760064db381f/rest_framework/mixins.py#L37-L46
Expand All @@ -453,7 +456,7 @@ def list(self, request, *args, **kwargs):

# Paginate and return
serializer = self.get_serializer(queryset, many=True, metadata=include_metadata)
return self.get_paginated_response(serializer.data)
return paginator.get_paginated_response(serializer.data)

@swagger_auto_schema(
query_serializer=AssetPathsQueryParameterSerializer,
Expand Down
31 changes: 18 additions & 13 deletions dandiapi/api/views/pagination.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,3 @@
"""
Implement an optimized pagination scheme.
This module provides a custom pagination implementation, as the existing `PageNumberPagination`
class returns a `count` field for every page returned. This can be very inefficient on large tables,
and in reality, the count is only necessary on the first page of results. This module implements
such a pagination scheme, only returning 'count' on the first page of results.
"""

from __future__ import annotations

from collections import OrderedDict
Expand All @@ -17,6 +8,24 @@ class returns a `count` field for every page returned. This can be very ineffici
from rest_framework.response import Response


class DandiPagination(PageNumberPagination):
page_size = 100
max_page_size = 1000
page_size_query_param = 'page_size'

@cached_property
def page_size_query_description(self):
return f'{super().page_size_query_description[:-1]} (maximum {self.max_page_size}).'


"""
The below code provides a custom pagination implementation, as the existing `PageNumberPagination`
class returns a `count` field for every page returned. This can be very inefficient on large tables,
and in reality, the count is only necessary on the first page of results. This module implements
such a pagination scheme, only returning 'count' on the first page of results.
"""


class LazyPage(Page):
"""
A page class that doesn't call .count() on the queryset it's paging from.
Expand Down Expand Up @@ -96,7 +105,3 @@ def get_paginated_response(self, data) -> Response:
)

return Response(page_dict)


# Alias
DandiPagination = LazyPagination