Skip to content

Commit

Permalink
Add support for resumption tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
chrispenny committed Apr 3, 2022
1 parent 1b81eae commit da49e46
Show file tree
Hide file tree
Showing 7 changed files with 240 additions and 40 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Filter support:

* `from`: specifies a lower bound for datestamp-based selective harvesting. UTC+0 datetimes must be provided.
* `until`: specifies an upper bound for datestamp-based selective harvesting. UTC+0 datetimes must be provided.
* `resumptionToken`: Includes validation of current verb and filters
* `set`: TBA
* `resumptionToken`: TBA

### List Sets

Expand Down
92 changes: 68 additions & 24 deletions src/Controllers/OaiController.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use SilverStripe\Control\HTTPResponse;
use SilverStripe\Core\Environment;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\ORM\DataList;
use SilverStripe\ORM\PaginatedList;
use SilverStripe\SiteConfig\SiteConfig;
use Terraformers\OpenArchive\Documents\Errors\BadVerbDocument;
use Terraformers\OpenArchive\Documents\Errors\CannotDisseminateFormatDocument;
Expand All @@ -20,6 +20,7 @@
use Terraformers\OpenArchive\Formatters\OaiDcFormatter;
use Terraformers\OpenArchive\Formatters\OaiRecordFormatter;
use Terraformers\OpenArchive\Helpers\DateTimeHelper;
use Terraformers\OpenArchive\Helpers\ResumptionTokenHelper;
use Terraformers\OpenArchive\Models\OaiRecord;
use Throwable;

Expand Down Expand Up @@ -69,6 +70,12 @@ class OaiController extends Controller
*/
private static string $supportedGranularity = 'YYYY-MM-DDThh:mm:ssZ';

/**
* For verbs that use Resumption Tokens, this is the configuration that controls how many OAI Records we will load
* into a single response
*/
private static string $oaiRecordsPerPage = '100';

public function index(HTTPRequest $request): HTTPResponse
{
$this->getResponse()->addHeader('Content-type', 'text/xml');
Expand Down Expand Up @@ -185,38 +192,66 @@ protected function ListRecords(HTTPRequest $request): HTTPResponse
// Request URL defaults to the current URL. Extension point is provided in this method
$xmlDocument->setRequestUrl($this->getRequestUrl($request));

// The lower bound for selective harvesting
$from = $request->getVar('from');
// The upper bound for selective harvesting
$until = $request->getVar('until');
// The lower bound for selective harvesting. The original UTC should be preserved for Resumption Tokens and any
// display requirements
$fromUtc = $request->getVar('from');
// Local value which will be used purely for internal filtering
$fromLocal = null;
// The upper bound for selective harvesting. The original UTC should be preserved for Resumption Tokens and any
// display requirements
$untilUtc = $request->getVar('until');
// Local value which will be used purely for internal filtering
$untilLocal = null;
// Specifies the Set for selective harvesting
$set = (int) $request->getVar('set');
$set = $request->getVar('set');
// An encoded string containing pagination requirements for selective harvesting
$resumptionToken = $request->getVar('resumptionToken');
// Default page is always 1, but this can change later if there is a Resumption Token active
$currentPage = 1;

if ($from) {
if ($fromUtc) {
try {
$from = DateTimeHelper::getLocalStringFromUtc($from);
$fromLocal = DateTimeHelper::getLocalStringFromUtc($fromUtc);
} catch (Throwable $e) {
$xmlDocument->addError(OaiDocument::ERROR_BAD_ARGUMENT, 'Invalid \'from\' date format provided');
}
}

if ($until) {
if ($untilUtc) {
try {
$until = DateTimeHelper::getLocalStringFromUtc($until);
$untilLocal = DateTimeHelper::getLocalStringFromUtc($untilUtc);
} catch (Throwable $e) {
$xmlDocument->addError(OaiDocument::ERROR_BAD_ARGUMENT, 'Invalid \'until\' date format provided');
}
}

if ($resumptionToken) {
try {
$currentPage = ResumptionTokenHelper::getPageFromResumptionToken(
$resumptionToken,
'ListRecords',
$fromUtc,
$untilUtc,
$set
);
} catch (Throwable $e) {
$xmlDocument->addError(OaiDocument::ERROR_BAD_RESUMPTION_TOKEN, $e->getMessage());
}
}

if ($xmlDocument->hasErrors()) {
return $this->getResponseWithDocumentBody($xmlDocument);
}

$oaiRecords = $this->fetchOaiRecords($from, $until, $set, $resumptionToken);
// Grab the Paginated List of records based on our filter criteria
$oaiRecords = $this->fetchOaiRecords($fromLocal, $untilLocal, $set);

// Set the page length and current page of our Paginated list
$oaiRecords->setPageLength($this->config()->get('oaiRecordsPerPage'));
$oaiRecords->setCurrentPage($currentPage);

if (!$oaiRecords->count()) {
// If there are no results after we apply filters and pagination, then we should return an error response
if (!$oaiRecords->Count()) {
$xmlDocument->addError(OaiDocument::ERROR_NO_RECORDS_MATCH);

return $this->getResponseWithDocumentBody($xmlDocument);
Expand All @@ -225,6 +260,19 @@ protected function ListRecords(HTTPRequest $request): HTTPResponse
// Start processing whatever OaiRecords we found
$xmlDocument->processOaiRecords($oaiRecords);

// If there are still more records to be processed, then we need to add a new Resumption Token to our response
if ($oaiRecords->TotalPages() > $currentPage) {
$newResumptionToken = ResumptionTokenHelper::generateResumptionToken(
'ListRecords',
$currentPage + 1,
$fromUtc,
$untilUtc,
$set
);

$xmlDocument->setResumptionToken($newResumptionToken);
}

return $this->getResponseWithDocumentBody($xmlDocument);
}

Expand Down Expand Up @@ -286,12 +334,8 @@ protected function getRepositoryName(): string
* Regarding dates, please @see $supportedGranularity docblock. All dates passed to this method should already be
* adjusted to local server time
*/
protected function fetchOaiRecords(
?string $from = null,
?string $until = null,
?int $set = null,
?string $resumptionToken = null
): DataList {
protected function fetchOaiRecords(?string $from = null, ?string $until = null, ?int $set = null): PaginatedList
{
$filters = [];

if ($from) {
Expand All @@ -306,15 +350,15 @@ protected function fetchOaiRecords(
// Set support to be added
}

if ($resumptionToken) {
// Resumption token support to be added
}

if (!$filters) {
return OaiRecord::get();
return PaginatedList::create(OaiRecord::get());
}

return OaiRecord::get()->filter($filters);
$list = OaiRecord::get()
->sort('LastEdited ASC')
->filter($filters);

return PaginatedList::create($list);
}

}
14 changes: 11 additions & 3 deletions src/Documents/ListRecordsDocument.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace Terraformers\OpenArchive\Documents;

use SilverStripe\ORM\DataList;
use SilverStripe\ORM\PaginatedList;
use Terraformers\OpenArchive\Formatters\OaiRecordFormatter;
use Terraformers\OpenArchive\Models\OaiRecord;

Expand All @@ -21,9 +21,9 @@ public function __construct(OaiRecordFormatter $formatter)
}

/**
* @param DataList|OaiRecord[] $oaiRecords
* @param PaginatedList|OaiRecord[] $oaiRecords
*/
public function processOaiRecords(DataList $oaiRecords): void
public function processOaiRecords(PaginatedList $oaiRecords): void
{
$listRecordsElement = $this->findOrCreateElement('ListRecords');

Expand All @@ -32,4 +32,12 @@ public function processOaiRecords(DataList $oaiRecords): void
}
}

public function setResumptionToken(string $resumptionToken): void
{
$listRecordsElement = $this->findOrCreateElement('ListRecords');
$resumptionTokenElement = $this->findOrCreateElement('resumptionToken', $listRecordsElement);

$resumptionTokenElement->nodeValue = $resumptionToken;
}

}
102 changes: 102 additions & 0 deletions src/Helpers/ResumptionTokenHelper.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<?php

namespace Terraformers\OpenArchive\Helpers;

use Exception;

/**
* Resumption Tokens are a form of pagination, however, they also contain a level of validation.
*
* Each Resumption Token should represent a specific request, including whatever filters might have been applied as
* part of that request, as well as representing a particular "page" in the Paginated List.
*
* The goal is to increase reliability of pagination by making sure that each requested "page" came from a request
* containing the expected filters. EG: You can't send an unfiltered request for OAI Records, see that there are 10
* pages, and then decide to request page=10 with some filters now applied. The Token itself would be aware that a
* different filter has been applied, and it would be invalid.
*/
class ResumptionTokenHelper
{

public static function generateResumptionToken(
string $verb,
int $page,
?string $from = null,
?string $until = null,
?int $set = null
): string {
// Every Resumption Token must include a verb and page
$parts = [
'page' => $page,
'verb' => $verb,
];

if ($from) {
$parts['from'] = $from;
}

if ($until) {
$parts['until'] = $until;
}

if ($set) {
$parts['set'] = $set;
}

return base64_encode(json_encode($parts));
}

public static function getPageFromResumptionToken(
string $resumptionToken,
string $expectedVerb,
?string $expectedFrom = null,
?string $expectedUntil = null,
?int $expectedSet = null
): int {
$resumptionParts = static::getResumptionTokenParts($resumptionToken);

// Grab the array values of our Resumption Token or default those values to null
$resumptionPage = $resumptionParts['page'] ?? null;
$resumptionVerb = $resumptionParts['verb'] ?? null;
$resumptionFrom = $resumptionParts['from'] ?? null;
$resumptionUntil = $resumptionParts['until'] ?? null;
$resumptionSet = $resumptionParts['set'] ?? null;

// Every Resumption Token should include (at the very least) the active page, if it doesn't, then it's invalid
if (!$resumptionPage) {
throw new Exception('Invalid resumption token');
}

// If any of these values do not match the expected values, then this Resumption Token is invalid
if ($resumptionVerb !== $expectedVerb
|| $resumptionFrom !== $expectedFrom
|| $resumptionUntil !== $expectedUntil
|| $resumptionSet !== $expectedSet
) {
throw new Exception('Invalid resumption token');
}

// The Resumption Token is valid, so we can return whatever value we have for page
return $resumptionPage;
}

protected static function getResumptionTokenParts(string $resumptionToken): array
{
$decode = base64_decode($resumptionToken, true);

// We can't do anything with an invalid encoded value
if (!$decode) {
throw new Exception('Invalid resumption token');
}

$resumptionParts = json_decode($decode, true);

// We expect all Resumption Tokens to decode to an array
if (!is_array($resumptionParts)) {
throw new Exception('Invalid resumption token');
}

return $resumptionParts;
}

}
2 changes: 2 additions & 0 deletions src/Models/OaiRecord.php
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class OaiRecord extends DataObject
],
];

private static string $default_sort = 'ID ASC';

public function addSet(string $title): void
{
$this->OaiSets()->add(OaiSet::findOrCreate($title));
Expand Down
16 changes: 4 additions & 12 deletions tests/Helpers/DateTimeHelperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,8 @@ public function testGetUtcStringFromLocal(): void

$localString = '2022-01-01 20:00:00';
$utcString = DateTimeHelper::getUtcStringFromLocal($localString);
// Check to see if Daylight Savings is active in Auckland
$daylightSavings = (bool) date('I');
// UTC can be -12 or -13 depending on Daylight Savings
$expectedUtc = $daylightSavings
? '2022-01-01T07:00:00Z'
: '2022-01-01T08:00:00Z';
// UTC is -13 when Daylight Savings is active
$expectedUtc = '2022-01-01T07:00:00Z';

$this->assertEquals($expectedUtc, $utcString);
}
Expand All @@ -32,12 +28,8 @@ public function testGetLocalStringFromUtc(): void

$utcString = '2022-01-01T07:00:00Z';
$localString = DateTimeHelper::getLocalStringFromUtc($utcString);
// Check to see if Daylight Savings is active in Auckland
$daylightSavings = (bool) date('I');
// Auckland time can be +12 or +13 depending on Daylight Savings
$expectedLocal = $daylightSavings
? '2022-01-01 20:00:00'
: '2022-01-01 19:00:00';
// UTC is -13 when Daylight Savings is active
$expectedLocal = '2022-01-01 20:00:00';

$this->assertEquals($expectedLocal, $localString);
}
Expand Down
Loading

0 comments on commit da49e46

Please sign in to comment.