From da49e460c508e278a3a224462fffddc07744529a Mon Sep 17 00:00:00 2001 From: Chris Penny Date: Mon, 4 Apr 2022 09:07:56 +1200 Subject: [PATCH] Add support for resumption tokens --- README.md | 2 +- src/Controllers/OaiController.php | 92 +++++++++++++----- src/Documents/ListRecordsDocument.php | 14 ++- src/Helpers/ResumptionTokenHelper.php | 102 ++++++++++++++++++++ src/Models/OaiRecord.php | 2 + tests/Helpers/DateTimeHelperTest.php | 16 +-- tests/Helpers/ResumptionTokenHelperTest.php | 52 ++++++++++ 7 files changed, 240 insertions(+), 40 deletions(-) create mode 100644 src/Helpers/ResumptionTokenHelper.php create mode 100644 tests/Helpers/ResumptionTokenHelperTest.php diff --git a/README.md b/README.md index 4507547..946288c 100644 --- a/README.md +++ b/README.md @@ -77,8 +77,8 @@ Filter support: * `from`: specifies a lower bound for datestamp-based selective harvesting. UTC+0 datetimes must be provided. * `until`: specifies an upper bound for datestamp-based selective harvesting. UTC+0 datetimes must be provided. +* `resumptionToken`: Includes validation of current verb and filters * `set`: TBA -* `resumptionToken`: TBA ### List Sets diff --git a/src/Controllers/OaiController.php b/src/Controllers/OaiController.php index d2593fb..30ce73c 100644 --- a/src/Controllers/OaiController.php +++ b/src/Controllers/OaiController.php @@ -9,7 +9,7 @@ use SilverStripe\Control\HTTPResponse; use SilverStripe\Core\Environment; use SilverStripe\Core\Injector\Injector; -use SilverStripe\ORM\DataList; +use SilverStripe\ORM\PaginatedList; use SilverStripe\SiteConfig\SiteConfig; use Terraformers\OpenArchive\Documents\Errors\BadVerbDocument; use Terraformers\OpenArchive\Documents\Errors\CannotDisseminateFormatDocument; @@ -20,6 +20,7 @@ use Terraformers\OpenArchive\Formatters\OaiDcFormatter; use Terraformers\OpenArchive\Formatters\OaiRecordFormatter; use Terraformers\OpenArchive\Helpers\DateTimeHelper; +use Terraformers\OpenArchive\Helpers\ResumptionTokenHelper; use Terraformers\OpenArchive\Models\OaiRecord; use Throwable; @@ -69,6 +70,12 @@ class OaiController extends Controller */ private static string $supportedGranularity = 'YYYY-MM-DDThh:mm:ssZ'; + /** + * For verbs that use Resumption Tokens, this is the configuration that controls how many OAI Records we will load + * into a single response + */ + private static string $oaiRecordsPerPage = '100'; + public function index(HTTPRequest $request): HTTPResponse { $this->getResponse()->addHeader('Content-type', 'text/xml'); @@ -185,38 +192,66 @@ protected function ListRecords(HTTPRequest $request): HTTPResponse // Request URL defaults to the current URL. Extension point is provided in this method $xmlDocument->setRequestUrl($this->getRequestUrl($request)); - // The lower bound for selective harvesting - $from = $request->getVar('from'); - // The upper bound for selective harvesting - $until = $request->getVar('until'); + // The lower bound for selective harvesting. The original UTC should be preserved for Resumption Tokens and any + // display requirements + $fromUtc = $request->getVar('from'); + // Local value which will be used purely for internal filtering + $fromLocal = null; + // The upper bound for selective harvesting. The original UTC should be preserved for Resumption Tokens and any + // display requirements + $untilUtc = $request->getVar('until'); + // Local value which will be used purely for internal filtering + $untilLocal = null; // Specifies the Set for selective harvesting - $set = (int) $request->getVar('set'); + $set = $request->getVar('set'); // An encoded string containing pagination requirements for selective harvesting $resumptionToken = $request->getVar('resumptionToken'); + // Default page is always 1, but this can change later if there is a Resumption Token active + $currentPage = 1; - if ($from) { + if ($fromUtc) { try { - $from = DateTimeHelper::getLocalStringFromUtc($from); + $fromLocal = DateTimeHelper::getLocalStringFromUtc($fromUtc); } catch (Throwable $e) { $xmlDocument->addError(OaiDocument::ERROR_BAD_ARGUMENT, 'Invalid \'from\' date format provided'); } } - if ($until) { + if ($untilUtc) { try { - $until = DateTimeHelper::getLocalStringFromUtc($until); + $untilLocal = DateTimeHelper::getLocalStringFromUtc($untilUtc); } catch (Throwable $e) { $xmlDocument->addError(OaiDocument::ERROR_BAD_ARGUMENT, 'Invalid \'until\' date format provided'); } } + if ($resumptionToken) { + try { + $currentPage = ResumptionTokenHelper::getPageFromResumptionToken( + $resumptionToken, + 'ListRecords', + $fromUtc, + $untilUtc, + $set + ); + } catch (Throwable $e) { + $xmlDocument->addError(OaiDocument::ERROR_BAD_RESUMPTION_TOKEN, $e->getMessage()); + } + } + if ($xmlDocument->hasErrors()) { return $this->getResponseWithDocumentBody($xmlDocument); } - $oaiRecords = $this->fetchOaiRecords($from, $until, $set, $resumptionToken); + // Grab the Paginated List of records based on our filter criteria + $oaiRecords = $this->fetchOaiRecords($fromLocal, $untilLocal, $set); + + // Set the page length and current page of our Paginated list + $oaiRecords->setPageLength($this->config()->get('oaiRecordsPerPage')); + $oaiRecords->setCurrentPage($currentPage); - if (!$oaiRecords->count()) { + // If there are no results after we apply filters and pagination, then we should return an error response + if (!$oaiRecords->Count()) { $xmlDocument->addError(OaiDocument::ERROR_NO_RECORDS_MATCH); return $this->getResponseWithDocumentBody($xmlDocument); @@ -225,6 +260,19 @@ protected function ListRecords(HTTPRequest $request): HTTPResponse // Start processing whatever OaiRecords we found $xmlDocument->processOaiRecords($oaiRecords); + // If there are still more records to be processed, then we need to add a new Resumption Token to our response + if ($oaiRecords->TotalPages() > $currentPage) { + $newResumptionToken = ResumptionTokenHelper::generateResumptionToken( + 'ListRecords', + $currentPage + 1, + $fromUtc, + $untilUtc, + $set + ); + + $xmlDocument->setResumptionToken($newResumptionToken); + } + return $this->getResponseWithDocumentBody($xmlDocument); } @@ -286,12 +334,8 @@ protected function getRepositoryName(): string * Regarding dates, please @see $supportedGranularity docblock. All dates passed to this method should already be * adjusted to local server time */ - protected function fetchOaiRecords( - ?string $from = null, - ?string $until = null, - ?int $set = null, - ?string $resumptionToken = null - ): DataList { + protected function fetchOaiRecords(?string $from = null, ?string $until = null, ?int $set = null): PaginatedList + { $filters = []; if ($from) { @@ -306,15 +350,15 @@ protected function fetchOaiRecords( // Set support to be added } - if ($resumptionToken) { - // Resumption token support to be added - } - if (!$filters) { - return OaiRecord::get(); + return PaginatedList::create(OaiRecord::get()); } - return OaiRecord::get()->filter($filters); + $list = OaiRecord::get() + ->sort('LastEdited ASC') + ->filter($filters); + + return PaginatedList::create($list); } } diff --git a/src/Documents/ListRecordsDocument.php b/src/Documents/ListRecordsDocument.php index 5e49fa0..4ddbc23 100644 --- a/src/Documents/ListRecordsDocument.php +++ b/src/Documents/ListRecordsDocument.php @@ -2,7 +2,7 @@ namespace Terraformers\OpenArchive\Documents; -use SilverStripe\ORM\DataList; +use SilverStripe\ORM\PaginatedList; use Terraformers\OpenArchive\Formatters\OaiRecordFormatter; use Terraformers\OpenArchive\Models\OaiRecord; @@ -21,9 +21,9 @@ public function __construct(OaiRecordFormatter $formatter) } /** - * @param DataList|OaiRecord[] $oaiRecords + * @param PaginatedList|OaiRecord[] $oaiRecords */ - public function processOaiRecords(DataList $oaiRecords): void + public function processOaiRecords(PaginatedList $oaiRecords): void { $listRecordsElement = $this->findOrCreateElement('ListRecords'); @@ -32,4 +32,12 @@ public function processOaiRecords(DataList $oaiRecords): void } } + public function setResumptionToken(string $resumptionToken): void + { + $listRecordsElement = $this->findOrCreateElement('ListRecords'); + $resumptionTokenElement = $this->findOrCreateElement('resumptionToken', $listRecordsElement); + + $resumptionTokenElement->nodeValue = $resumptionToken; + } + } diff --git a/src/Helpers/ResumptionTokenHelper.php b/src/Helpers/ResumptionTokenHelper.php new file mode 100644 index 0000000..bbc2b6e --- /dev/null +++ b/src/Helpers/ResumptionTokenHelper.php @@ -0,0 +1,102 @@ + $page, + 'verb' => $verb, + ]; + + if ($from) { + $parts['from'] = $from; + } + + if ($until) { + $parts['until'] = $until; + } + + if ($set) { + $parts['set'] = $set; + } + + return base64_encode(json_encode($parts)); + } + + public static function getPageFromResumptionToken( + string $resumptionToken, + string $expectedVerb, + ?string $expectedFrom = null, + ?string $expectedUntil = null, + ?int $expectedSet = null + ): int { + $resumptionParts = static::getResumptionTokenParts($resumptionToken); + + // Grab the array values of our Resumption Token or default those values to null + $resumptionPage = $resumptionParts['page'] ?? null; + $resumptionVerb = $resumptionParts['verb'] ?? null; + $resumptionFrom = $resumptionParts['from'] ?? null; + $resumptionUntil = $resumptionParts['until'] ?? null; + $resumptionSet = $resumptionParts['set'] ?? null; + + // Every Resumption Token should include (at the very least) the active page, if it doesn't, then it's invalid + if (!$resumptionPage) { + throw new Exception('Invalid resumption token'); + } + + // If any of these values do not match the expected values, then this Resumption Token is invalid + if ($resumptionVerb !== $expectedVerb + || $resumptionFrom !== $expectedFrom + || $resumptionUntil !== $expectedUntil + || $resumptionSet !== $expectedSet + ) { + throw new Exception('Invalid resumption token'); + } + + // The Resumption Token is valid, so we can return whatever value we have for page + return $resumptionPage; + } + + protected static function getResumptionTokenParts(string $resumptionToken): array + { + $decode = base64_decode($resumptionToken, true); + + // We can't do anything with an invalid encoded value + if (!$decode) { + throw new Exception('Invalid resumption token'); + } + + $resumptionParts = json_decode($decode, true); + + // We expect all Resumption Tokens to decode to an array + if (!is_array($resumptionParts)) { + throw new Exception('Invalid resumption token'); + } + + return $resumptionParts; + } + +} diff --git a/src/Models/OaiRecord.php b/src/Models/OaiRecord.php index 97450d7..df17b3b 100644 --- a/src/Models/OaiRecord.php +++ b/src/Models/OaiRecord.php @@ -114,6 +114,8 @@ class OaiRecord extends DataObject ], ]; + private static string $default_sort = 'ID ASC'; + public function addSet(string $title): void { $this->OaiSets()->add(OaiSet::findOrCreate($title)); diff --git a/tests/Helpers/DateTimeHelperTest.php b/tests/Helpers/DateTimeHelperTest.php index a4b02c6..0dd0bf5 100644 --- a/tests/Helpers/DateTimeHelperTest.php +++ b/tests/Helpers/DateTimeHelperTest.php @@ -15,12 +15,8 @@ public function testGetUtcStringFromLocal(): void $localString = '2022-01-01 20:00:00'; $utcString = DateTimeHelper::getUtcStringFromLocal($localString); - // Check to see if Daylight Savings is active in Auckland - $daylightSavings = (bool) date('I'); - // UTC can be -12 or -13 depending on Daylight Savings - $expectedUtc = $daylightSavings - ? '2022-01-01T07:00:00Z' - : '2022-01-01T08:00:00Z'; + // UTC is -13 when Daylight Savings is active + $expectedUtc = '2022-01-01T07:00:00Z'; $this->assertEquals($expectedUtc, $utcString); } @@ -32,12 +28,8 @@ public function testGetLocalStringFromUtc(): void $utcString = '2022-01-01T07:00:00Z'; $localString = DateTimeHelper::getLocalStringFromUtc($utcString); - // Check to see if Daylight Savings is active in Auckland - $daylightSavings = (bool) date('I'); - // Auckland time can be +12 or +13 depending on Daylight Savings - $expectedLocal = $daylightSavings - ? '2022-01-01 20:00:00' - : '2022-01-01 19:00:00'; + // UTC is -13 when Daylight Savings is active + $expectedLocal = '2022-01-01 20:00:00'; $this->assertEquals($expectedLocal, $localString); } diff --git a/tests/Helpers/ResumptionTokenHelperTest.php b/tests/Helpers/ResumptionTokenHelperTest.php new file mode 100644 index 0000000..fcbed19 --- /dev/null +++ b/tests/Helpers/ResumptionTokenHelperTest.php @@ -0,0 +1,52 @@ + 'ListRecords', + 'page' => 3, + 'from' => '2022-01-01T01:00:00Z', + 'until' => '2022-01-01T02:00:00Z', + 'set' => 2, + ]; + + // Generate our Token + $token = ResumptionTokenHelper::generateResumptionToken( + 'ListRecords', + 3, + '2022-01-01T01:00:00Z', + '2022-01-01T02:00:00Z', + 2 + ); + + // Now decode that Token + $reflection = new ReflectionClass(ResumptionTokenHelper::class); + $method = $reflection->getMethod('getResumptionTokenParts'); + $method->setAccessible(true); + $resumptionParts = $method->invoke(null, $token); + + // And check that the Token that was encoded and decoded matches our expected values + $this->assertEquals(ksort($expectedParts), ksort($resumptionParts)); + // And check that our "get page number" method works as well + $this->assertEquals( + 3, + ResumptionTokenHelper::getPageFromResumptionToken( + $token, + 'ListRecords', + '2022-01-01T01:00:00Z', + '2022-01-01T02:00:00Z', + 2 + ) + ); + } + +}