diff --git a/README.md b/README.md index 4507547..946288c 100644 --- a/README.md +++ b/README.md @@ -77,8 +77,8 @@ Filter support: * `from`: specifies a lower bound for datestamp-based selective harvesting. UTC+0 datetimes must be provided. * `until`: specifies an upper bound for datestamp-based selective harvesting. UTC+0 datetimes must be provided. +* `resumptionToken`: Includes validation of current verb and filters * `set`: TBA -* `resumptionToken`: TBA ### List Sets diff --git a/src/Controllers/OaiController.php b/src/Controllers/OaiController.php index d2593fb..c115c1d 100644 --- a/src/Controllers/OaiController.php +++ b/src/Controllers/OaiController.php @@ -9,7 +9,7 @@ use SilverStripe\Control\HTTPResponse; use SilverStripe\Core\Environment; use SilverStripe\Core\Injector\Injector; -use SilverStripe\ORM\DataList; +use SilverStripe\ORM\PaginatedList; use SilverStripe\SiteConfig\SiteConfig; use Terraformers\OpenArchive\Documents\Errors\BadVerbDocument; use Terraformers\OpenArchive\Documents\Errors\CannotDisseminateFormatDocument; @@ -20,6 +20,7 @@ use Terraformers\OpenArchive\Formatters\OaiDcFormatter; use Terraformers\OpenArchive\Formatters\OaiRecordFormatter; use Terraformers\OpenArchive\Helpers\DateTimeHelper; +use Terraformers\OpenArchive\Helpers\ResumptionTokenHelper; use Terraformers\OpenArchive\Models\OaiRecord; use Throwable; @@ -56,9 +57,9 @@ class OaiController extends Controller 'oai_dc' => OaiDcFormatter::class, ]; - private static string $supportedProtocol = '2.0'; + private static string $supported_protocol = '2.0'; - private static string $supportedDeletedRecord = self::DELETED_SUPPORT_PERSISTENT; + private static string $supported_deleted_record = self::DELETED_SUPPORT_PERSISTENT; /** * All dates provided by the OAI repository must be ISO8601, and with an additional requirement that only "zulu" is @@ -67,7 +68,20 @@ class OaiController extends Controller * * @see http://www.openarchives.org/OAI/openarchivesprotocol.html#Dates */ - private static string $supportedGranularity = 'YYYY-MM-DDThh:mm:ssZ'; + private static string $supported_granularity = 'YYYY-MM-DDThh:mm:ssZ'; + + /** + * For verbs that use Resumption Tokens, this is the configuration that controls how many OAI Records we will load + * into a single response + */ + private static string $oai_records_per_page = '100'; + + /** + * The expiration time (in seconds) of any resumption tokens that are generated. Default is 60 minutes + * + * Set this to null if you want an infinite duration + */ + private static ?int $resumption_token_expiry = 3600; public function index(HTTPRequest $request): HTTPResponse { @@ -122,11 +136,11 @@ protected function Identify(HTTPRequest $request): HTTPResponse // Base URL defaults to the current URL. Extension point is provided in this method $xmlDocument->setBaseUrl($this->getBaseUrl($request)); // Protocol Version defaults to 2.0. You can update the configuration if required - $xmlDocument->setProtocolVersion($this->config()->get('supportedProtocol')); + $xmlDocument->setProtocolVersion($this->config()->get('supported_protocol')); // Deleted Record support defaults to "persistent". You can update the configuration if required - $xmlDocument->setDeletedRecord($this->config()->get('supportedDeletedRecord')); + $xmlDocument->setDeletedRecord($this->config()->get('supported_deleted_record')); // Date Granularity support defaults to date and time. You can update the configuration if required - $xmlDocument->setGranularity($this->config()->get('supportedGranularity')); + $xmlDocument->setGranularity($this->config()->get('supported_granularity')); // You should set your env var appropriately for this value $xmlDocument->setAdminEmail(Environment::getEnv(OaiController::OAI_API_ADMIN_EMAIL)); // Earliest Datestamp defaults to the Jan 1970 (the start of UNIX). Extension point is provided in this method @@ -185,38 +199,66 @@ protected function ListRecords(HTTPRequest $request): HTTPResponse // Request URL defaults to the current URL. Extension point is provided in this method $xmlDocument->setRequestUrl($this->getRequestUrl($request)); - // The lower bound for selective harvesting - $from = $request->getVar('from'); - // The upper bound for selective harvesting - $until = $request->getVar('until'); + // The lower bound for selective harvesting. The original UTC should be preserved for Resumption Tokens and any + // display requirements + $fromUtc = $request->getVar('from'); + // Local value which will be used purely for internal filtering + $fromLocal = null; + // The upper bound for selective harvesting. The original UTC should be preserved for Resumption Tokens and any + // display requirements + $untilUtc = $request->getVar('until'); + // Local value which will be used purely for internal filtering + $untilLocal = null; // Specifies the Set for selective harvesting - $set = (int) $request->getVar('set'); + $set = $request->getVar('set'); // An encoded string containing pagination requirements for selective harvesting $resumptionToken = $request->getVar('resumptionToken'); + // Default page is always 1, but this can change later if there is a Resumption Token active + $currentPage = 1; - if ($from) { + if ($fromUtc) { try { - $from = DateTimeHelper::getLocalStringFromUtc($from); + $fromLocal = DateTimeHelper::getLocalStringFromUtc($fromUtc); } catch (Throwable $e) { $xmlDocument->addError(OaiDocument::ERROR_BAD_ARGUMENT, 'Invalid \'from\' date format provided'); } } - if ($until) { + if ($untilUtc) { try { - $until = DateTimeHelper::getLocalStringFromUtc($until); + $untilLocal = DateTimeHelper::getLocalStringFromUtc($untilUtc); } catch (Throwable $e) { $xmlDocument->addError(OaiDocument::ERROR_BAD_ARGUMENT, 'Invalid \'until\' date format provided'); } } + if ($resumptionToken) { + try { + $currentPage = ResumptionTokenHelper::getPageFromResumptionToken( + $resumptionToken, + 'ListRecords', + $fromUtc, + $untilUtc, + $set + ); + } catch (Throwable $e) { + $xmlDocument->addError(OaiDocument::ERROR_BAD_RESUMPTION_TOKEN, $e->getMessage()); + } + } + if ($xmlDocument->hasErrors()) { return $this->getResponseWithDocumentBody($xmlDocument); } - $oaiRecords = $this->fetchOaiRecords($from, $until, $set, $resumptionToken); + // Grab the Paginated List of records based on our filter criteria + $oaiRecords = $this->fetchOaiRecords($fromLocal, $untilLocal, $set); - if (!$oaiRecords->count()) { + // Set the page length and current page of our Paginated list + $oaiRecords->setPageLength($this->config()->get('oai_records_per_page')); + $oaiRecords->setCurrentPage($currentPage); + + // If there are no results after we apply filters and pagination, then we should return an error response + if (!$oaiRecords->Count()) { $xmlDocument->addError(OaiDocument::ERROR_NO_RECORDS_MATCH); return $this->getResponseWithDocumentBody($xmlDocument); @@ -225,6 +267,23 @@ protected function ListRecords(HTTPRequest $request): HTTPResponse // Start processing whatever OaiRecords we found $xmlDocument->processOaiRecords($oaiRecords); + // If there are still more records to be processed, then we need to add a new Resumption Token to our response + if ($oaiRecords->TotalPages() > $currentPage) { + $newResumptionToken = ResumptionTokenHelper::generateResumptionToken( + 'ListRecords', + $currentPage + 1, + $fromUtc, + $untilUtc, + $set + ); + + $xmlDocument->setResumptionToken($newResumptionToken); + } elseif ($resumptionToken) { + // If this is the last page of a request that included a Resumption Token, then we specifically need to add + // an empty Token - indicating that the list is now complete + $xmlDocument->setResumptionToken(''); + } + return $this->getResponseWithDocumentBody($xmlDocument); } @@ -283,15 +342,11 @@ protected function getRepositoryName(): string } /** - * Regarding dates, please @see $supportedGranularity docblock. All dates passed to this method should already be + * Regarding dates, please @see $supported_granularity docblock. All dates passed to this method should already be * adjusted to local server time */ - protected function fetchOaiRecords( - ?string $from = null, - ?string $until = null, - ?int $set = null, - ?string $resumptionToken = null - ): DataList { + protected function fetchOaiRecords(?string $from = null, ?string $until = null, ?int $set = null): PaginatedList + { $filters = []; if ($from) { @@ -306,15 +361,15 @@ protected function fetchOaiRecords( // Set support to be added } - if ($resumptionToken) { - // Resumption token support to be added - } - if (!$filters) { - return OaiRecord::get(); + return PaginatedList::create(OaiRecord::get()); } - return OaiRecord::get()->filter($filters); + $list = OaiRecord::get() + ->sort('LastEdited ASC') + ->filter($filters); + + return PaginatedList::create($list); } } diff --git a/src/Documents/ListRecordsDocument.php b/src/Documents/ListRecordsDocument.php index 5e49fa0..ffb47fc 100644 --- a/src/Documents/ListRecordsDocument.php +++ b/src/Documents/ListRecordsDocument.php @@ -2,8 +2,9 @@ namespace Terraformers\OpenArchive\Documents; -use SilverStripe\ORM\DataList; +use SilverStripe\ORM\PaginatedList; use Terraformers\OpenArchive\Formatters\OaiRecordFormatter; +use Terraformers\OpenArchive\Helpers\ResumptionTokenHelper; use Terraformers\OpenArchive\Models\OaiRecord; class ListRecordsDocument extends OaiDocument @@ -21,9 +22,9 @@ public function __construct(OaiRecordFormatter $formatter) } /** - * @param DataList|OaiRecord[] $oaiRecords + * @param PaginatedList|OaiRecord[] $oaiRecords */ - public function processOaiRecords(DataList $oaiRecords): void + public function processOaiRecords(PaginatedList $oaiRecords): void { $listRecordsElement = $this->findOrCreateElement('ListRecords'); @@ -32,4 +33,20 @@ public function processOaiRecords(DataList $oaiRecords): void } } + public function setResumptionToken(string $resumptionToken): void + { + $listRecordsElement = $this->findOrCreateElement('ListRecords'); + $resumptionTokenElement = $this->findOrCreateElement('resumptionToken', $listRecordsElement); + + $resumptionTokenElement->nodeValue = $resumptionToken; + + $tokenExpiry = ResumptionTokenHelper::getExpiryFromResumptionToken($resumptionToken); + + if (!$tokenExpiry) { + return; + } + + $resumptionTokenElement->setAttribute('expirationDate', $tokenExpiry); + } + } diff --git a/src/Helpers/DateTimeHelper.php b/src/Helpers/DateTimeHelper.php index 1bc013e..fc0b1d2 100644 --- a/src/Helpers/DateTimeHelper.php +++ b/src/Helpers/DateTimeHelper.php @@ -15,6 +15,7 @@ public static function getLocalStringFromUtc(string $utcDateString): string throw new Exception('Invalid UTC date format provided'); } + // Note: strtotime() already converts UTC date strings (UTC+Z) into local timestamps return date('Y-m-d H:i:s', strtotime($utcDateString)); } diff --git a/src/Helpers/ResumptionTokenHelper.php b/src/Helpers/ResumptionTokenHelper.php new file mode 100644 index 0000000..854c862 --- /dev/null +++ b/src/Helpers/ResumptionTokenHelper.php @@ -0,0 +1,140 @@ + $page, + 'verb' => $verb, + ]; + + // Check to see if we want to give our Tokens an expiry date + $tokenExpiryLength = OaiController::config()->get('resumption_token_expiry'); + + if ($tokenExpiryLength) { + // Set the expiry date for a time in the future matching the expiry length + $parts['expiry'] = DateTimeHelper::getUtcStringFromLocal( + date('Y-m-d H:i:s', DBDatetime::now()->getTimestamp() + $tokenExpiryLength) + ); + } + + if ($from) { + $parts['from'] = $from; + } + + if ($until) { + $parts['until'] = $until; + } + + if ($set) { + $parts['set'] = $set; + } + + return base64_encode(json_encode($parts)); + } + + public static function getPageFromResumptionToken( + string $resumptionToken, + string $expectedVerb, + ?string $expectedFrom = null, + ?string $expectedUntil = null, + ?int $expectedSet = null + ): int { + $resumptionParts = static::getResumptionTokenParts($resumptionToken); + + // Grab the array values of our Resumption Token or default those values to null + $resumptionPage = $resumptionParts['page'] ?? null; + $resumptionVerb = $resumptionParts['verb'] ?? null; + $resumptionFrom = $resumptionParts['from'] ?? null; + $resumptionUntil = $resumptionParts['until'] ?? null; + $resumptionSet = $resumptionParts['set'] ?? null; + $resumptionExpiry = $resumptionParts['expiry'] ?? null; + + // Every Resumption Token should include (at the very least) the active page, if it doesn't, then it's invalid + if (!$resumptionPage) { + throw new Exception('Invalid resumption token'); + } + + // If any of these values do not match the expected values, then this Resumption Token is invalid + if ($resumptionVerb !== $expectedVerb + || $resumptionFrom !== $expectedFrom + || $resumptionUntil !== $expectedUntil + || $resumptionSet !== $expectedSet + ) { + throw new Exception('Invalid resumption token'); + } + + // The duration that each Token lives (in seconds) + $tokenExpiryLength = OaiController::config()->get('resumption_token_expiry'); + + // The duration has been set to infinite, so we can return now + if (!$tokenExpiryLength) { + return $resumptionPage; + } + + // If the current time is greater than the expiry date of the Resumption Token, then this Token is invalid + // Note: strtotime() already converts UTC date strings (UTC+Z) into local timestamps + if (DBDatetime::now()->getTimestamp() > strtotime($resumptionExpiry)) { + throw new Exception('Invalid resumption token'); + } + + // The Resumption Token is valid, so we can return whatever value we have for page + return $resumptionPage; + } + + public static function getExpiryFromResumptionToken(string $resumptionToken): ?string + { + $resumptionParts = static::getResumptionTokenParts($resumptionToken); + + return $resumptionParts['expiry'] ?? null; + } + + protected static function getResumptionTokenParts(string $resumptionToken): array + { + if (!$resumptionToken) { + return []; + } + + $decode = base64_decode($resumptionToken, true); + + // We can't do anything with an invalid encoded value + if (!$decode) { + throw new Exception('Invalid resumption token'); + } + + $resumptionParts = json_decode($decode, true); + + // We expect all Resumption Tokens to decode to an array + if (!is_array($resumptionParts)) { + throw new Exception('Invalid resumption token'); + } + + return $resumptionParts; + } + +} diff --git a/src/Models/OaiRecord.php b/src/Models/OaiRecord.php index 97450d7..df17b3b 100644 --- a/src/Models/OaiRecord.php +++ b/src/Models/OaiRecord.php @@ -114,6 +114,8 @@ class OaiRecord extends DataObject ], ]; + private static string $default_sort = 'ID ASC'; + public function addSet(string $title): void { $this->OaiSets()->add(OaiSet::findOrCreate($title)); diff --git a/tests/Helpers/DateTimeHelperTest.php b/tests/Helpers/DateTimeHelperTest.php index a4b02c6..0dd0bf5 100644 --- a/tests/Helpers/DateTimeHelperTest.php +++ b/tests/Helpers/DateTimeHelperTest.php @@ -15,12 +15,8 @@ public function testGetUtcStringFromLocal(): void $localString = '2022-01-01 20:00:00'; $utcString = DateTimeHelper::getUtcStringFromLocal($localString); - // Check to see if Daylight Savings is active in Auckland - $daylightSavings = (bool) date('I'); - // UTC can be -12 or -13 depending on Daylight Savings - $expectedUtc = $daylightSavings - ? '2022-01-01T07:00:00Z' - : '2022-01-01T08:00:00Z'; + // UTC is -13 when Daylight Savings is active + $expectedUtc = '2022-01-01T07:00:00Z'; $this->assertEquals($expectedUtc, $utcString); } @@ -32,12 +28,8 @@ public function testGetLocalStringFromUtc(): void $utcString = '2022-01-01T07:00:00Z'; $localString = DateTimeHelper::getLocalStringFromUtc($utcString); - // Check to see if Daylight Savings is active in Auckland - $daylightSavings = (bool) date('I'); - // Auckland time can be +12 or +13 depending on Daylight Savings - $expectedLocal = $daylightSavings - ? '2022-01-01 20:00:00' - : '2022-01-01 19:00:00'; + // UTC is -13 when Daylight Savings is active + $expectedLocal = '2022-01-01 20:00:00'; $this->assertEquals($expectedLocal, $localString); } diff --git a/tests/Helpers/ResumptionTokenHelperTest.php b/tests/Helpers/ResumptionTokenHelperTest.php new file mode 100644 index 0000000..9068d43 --- /dev/null +++ b/tests/Helpers/ResumptionTokenHelperTest.php @@ -0,0 +1,121 @@ + $verb, + 'page' => $page, + 'from' => $from, + 'until' => $until, + 'set' => $set, + 'expiry' => $expiry, + ]; + + // Generate our Token + $token = ResumptionTokenHelper::generateResumptionToken($verb, $page, $from, $until, $set); + + // Now decode that Token + $reflection = new ReflectionClass(ResumptionTokenHelper::class); + $method = $reflection->getMethod('getResumptionTokenParts'); + $method->setAccessible(true); + $resumptionParts = $method->invoke(null, $token); + + // And check that the Token that was encoded and decoded matches our expected values + $this->assertEquals(ksort($expectedParts), ksort($resumptionParts)); + // Check that our "get page number" method works as well + $this->assertEquals( + $page, + ResumptionTokenHelper::getPageFromResumptionToken($token, $verb, $from, $until, $set) + ); + // Check that our "get expiry" method works as well + $this->assertEquals($expiry, ResumptionTokenHelper::getExpiryFromResumptionToken($token)); + } + + public function testResumptionTokenHasExpired(): void + { + // We'll use Auckland time for our tests + date_default_timezone_set('Pacific/Auckland'); + + $this->expectExceptionMessage('Invalid resumption token'); + + DBDatetime::set_mock_now('2020-01-01 13:00:00'); + + $verb = 'ListRecords'; + $page = 3; + $from = '2022-01-01T01:00:00Z'; + $until = '2022-01-01T02:00:00Z'; + $set = 2; + + // Generate our Token + $token = ResumptionTokenHelper::generateResumptionToken($verb, $page, $from, $until, $set); + + // Now set the time to a couple hours later. This should invalidate the Resumption Token + DBDatetime::set_mock_now('2020-01-01 15:00:00'); + + // This should throw an Exception + ResumptionTokenHelper::getPageFromResumptionToken($token, $verb, $from, $until, $set); + } + + public function testResumptionTokenNoExpiry(): void + { + OaiController::config()->set('resumption_token_expiry', null); + + $verb = 'ListRecords'; + $page = 3; + $from = '2022-01-01T01:00:00Z'; + $until = '2022-01-01T02:00:00Z'; + $set = 2; + + $expectedParts = [ + 'verb' => $verb, + 'page' => $page, + 'from' => $from, + 'until' => $until, + 'set' => $set, + ]; + + // Generate our Token + $token = ResumptionTokenHelper::generateResumptionToken($verb, $page, $from, $until, $set); + + // Now decode that Token + $reflection = new ReflectionClass(ResumptionTokenHelper::class); + $method = $reflection->getMethod('getResumptionTokenParts'); + $method->setAccessible(true); + $resumptionParts = $method->invoke(null, $token); + + // And check that the Token that was encoded and decoded matches our expected values + $this->assertEquals(ksort($expectedParts), ksort($resumptionParts)); + // And check that our "get page number" method works as well + $this->assertEquals( + $page, + ResumptionTokenHelper::getPageFromResumptionToken($token, $verb, $from, $until, $set) + ); + // Check that our "get expiry" method works as well (expecting there to be no value) + $this->assertNull(ResumptionTokenHelper::getExpiryFromResumptionToken($token)); + } + +}