From 68904eebba631e9e2a0046c8975b15ec07a4d960 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Tue, 28 May 2024 14:58:23 -0400 Subject: [PATCH] Use normal PageNumber pagination for listing endpoints This retains the behavior of using a custom pagination class for the asset list endpoint --- dandiapi/api/tests/test_pagination.py | 86 --------------------------- dandiapi/api/views/asset.py | 9 ++- dandiapi/api/views/pagination.py | 31 ++++++---- 3 files changed, 24 insertions(+), 102 deletions(-) diff --git a/dandiapi/api/tests/test_pagination.py b/dandiapi/api/tests/test_pagination.py index bc890be81..2081fc03b 100644 --- a/dandiapi/api/tests/test_pagination.py +++ b/dandiapi/api/tests/test_pagination.py @@ -3,65 +3,6 @@ import pytest -@pytest.mark.django_db() -def test_dandiset_pagination(api_client, dandiset_factory): - endpoint = '/api/dandisets/' - for _ in range(10): - dandiset_factory() - - # First page - resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5}).json() - assert resp['count'] == 10 - assert resp['next'] is not None - page_one = resp['results'] - assert len(page_one) == 5 - - # Second page - resp = api_client.get(endpoint, {'order': 'id', 'page_size': 5, 'page': 2}).json() - assert resp['count'] is None - assert resp['next'] is None - page_two = resp['results'] - assert len(page_two) == 5 - - # Full page - resp = api_client.get(endpoint, {'order': 'id', 'page_size': 100}).json() - assert resp['count'] == 10 - assert resp['next'] is None - full_page = resp['results'] - assert len(full_page) == 10 - - # Assert full list is ordered the same as both paginated lists - assert full_page == page_one + page_two - - -@pytest.mark.django_db() -def test_version_pagination(api_client, dandiset, published_version_factory): - endpoint = f'/api/dandisets/{dandiset.identifier}/versions/' - - for _ in range(10): - published_version_factory(dandiset=dandiset) - - resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5}).json() - - assert resp['count'] == 10 - page_one = resp['results'] - assert len(page_one) == 5 - - resp = api_client.get(endpoint, {'order': 'created', 'page_size': 5, 'page': 2}).json() - assert resp['count'] is None - assert resp['next'] is None - page_two = resp['results'] - assert len(page_two) == 5 - - resp = api_client.get(endpoint, {'order': 'created', 'page_size': 100}).json() - assert resp['count'] == 10 - assert resp['next'] is None - full_page = resp['results'] - assert len(full_page) == 10 - - assert full_page == page_one + page_two - - @pytest.mark.django_db() def test_asset_pagination(api_client, version, asset_factory): endpoint = f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/' @@ -92,30 +33,3 @@ def test_asset_pagination(api_client, version, asset_factory): # Assert full list is ordered the same as both paginated lists assert full_page == page_one + page_two - - -@pytest.mark.django_db() -def test_zarr_pagination(api_client, zarr_archive_factory): - endpoint = '/api/zarr/' - - for _ in range(10): - zarr_archive_factory() - - resp = api_client.get(endpoint, {'page_size': 5}).json() - assert resp['count'] == 10 - page_one = resp['results'] - assert len(page_one) == 5 - - resp = api_client.get(endpoint, {'page_size': 5, 'page': 2}).json() - assert resp['count'] is None - assert resp['next'] is None - page_two = resp['results'] - assert len(page_two) == 5 - - resp = api_client.get(endpoint, {'page_size': 100}).json() - assert resp['count'] == 10 - assert resp['next'] is None - full_page = resp['results'] - assert len(full_page) == 10 - - assert full_page == page_one + page_two diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 4595b9ebd..4e86cf273 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -46,7 +46,7 @@ VERSIONS_DANDISET_PK_PARAM, VERSIONS_VERSION_PARAM, ) -from dandiapi.api.views.pagination import DandiPagination +from dandiapi.api.views.pagination import DandiPagination, LazyPagination from dandiapi.api.views.serializers import ( AssetDetailSerializer, AssetDownloadQueryParameterSerializer, @@ -431,7 +431,10 @@ def list(self, request, *args, **kwargs): asset_queryset = asset_queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*')) # Retrieve just the first N asset IDs, and use them for pagination - page_of_asset_ids = self.paginate_queryset(asset_queryset.values_list('id', flat=True)) + # Use custom pagination class to reduce unnecessary counts of assets + paginator = LazyPagination() + qs = asset_queryset.values_list('id', flat=True) + page_of_asset_ids = paginator.paginate_queryset(qs, request=self.request, view=self) # Not sure when the page is ever None, but this condition is checked for compatibility with # the original implementation: https://github.com/encode/django-rest-framework/blob/f4194c4684420ac86485d9610adf760064db381f/rest_framework/mixins.py#L37-L46 @@ -453,7 +456,7 @@ def list(self, request, *args, **kwargs): # Paginate and return serializer = self.get_serializer(queryset, many=True, metadata=include_metadata) - return self.get_paginated_response(serializer.data) + return paginator.get_paginated_response(serializer.data) @swagger_auto_schema( query_serializer=AssetPathsQueryParameterSerializer, diff --git a/dandiapi/api/views/pagination.py b/dandiapi/api/views/pagination.py index c48b76f72..d2efaffbe 100644 --- a/dandiapi/api/views/pagination.py +++ b/dandiapi/api/views/pagination.py @@ -1,12 +1,3 @@ -""" -Implement an optimized pagination scheme. - -This module provides a custom pagination implementation, as the existing `PageNumberPagination` -class returns a `count` field for every page returned. This can be very inefficient on large tables, -and in reality, the count is only necessary on the first page of results. This module implements -such a pagination scheme, only returning 'count' on the first page of results. -""" - from __future__ import annotations from collections import OrderedDict @@ -17,6 +8,24 @@ class returns a `count` field for every page returned. This can be very ineffici from rest_framework.response import Response +class DandiPagination(PageNumberPagination): + page_size = 100 + max_page_size = 1000 + page_size_query_param = 'page_size' + + @cached_property + def page_size_query_description(self): + return f'{super().page_size_query_description[:-1]} (maximum {self.max_page_size}).' + + +""" +The below code provides a custom pagination implementation, as the existing `PageNumberPagination` +class returns a `count` field for every page returned. This can be very inefficient on large tables, +and in reality, the count is only necessary on the first page of results. This module implements +such a pagination scheme, only returning 'count' on the first page of results. +""" + + class LazyPage(Page): """ A page class that doesn't call .count() on the queryset it's paging from. @@ -96,7 +105,3 @@ def get_paginated_response(self, data) -> Response: ) return Response(page_dict) - - -# Alias -DandiPagination = LazyPagination