Skip to content

Commit

Permalink
Merge pull request #203 from nulib/4546-pdf
Browse files Browse the repository at this point in the history
Auxiliary & Access file download
  • Loading branch information
kdid authored May 2, 2024
2 parents 03d3e02 + 9a2f9ca commit d3044d3
Show file tree
Hide file tree
Showing 12 changed files with 4,175 additions and 299 deletions.
8 changes: 8 additions & 0 deletions docs/docs/spec/data-types.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ components:
description:
type: string
nullable: true
download_url:
type: string
nullable: true
duration:
type: number
nullable: true
Expand Down Expand Up @@ -172,6 +175,7 @@ components:
- id
- accession_number
- description
- download_url
- duration
- height
- label
Expand Down Expand Up @@ -497,6 +501,9 @@ components:
embedding_model:
type: string
description: The name of the inference model used to generate the `embedding` from the resource's content.
download_url:
type: string
nullable: true
file_sets:
type: array
description: File sets associated with the resource.
Expand Down Expand Up @@ -759,6 +766,7 @@ components:
- description
- embedding
- embedding_model
- download_url
- file_sets
- folder_name
- folder_number
Expand Down
2,121 changes: 1,989 additions & 132 deletions node/package-lock.json

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions node/src/api/response/iiif/manifest.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ const {
buildAnnotationBody,
buildImageResourceId,
buildImageService,
isAltFormat,
isAudioVideo,
isPDF,
buildSupplementingAnnotation,
} = require("./presentation-api/items");
const { metadataLabelFields } = require("./presentation-api/metadata");
Expand Down Expand Up @@ -108,6 +110,24 @@ function transform(response) {
},
});

/** Add rendering */
let renderings = [];
source.file_sets
.filter((fileSet) => fileSet.role === "Auxiliary")
.filter((fileSet) => isPDF(fileSet.mime_type))
.forEach((fileSet) => {
const rendering = {
id: fileSet.download_url || null,
type: "Text",
label: {
en: [fileSet.label || "Download PDF"],
},
format: "application/pdf",
};
renderings.push(rendering);
});
manifest.setRendering(renderings);

/** Add rights using rights statement */
source.rights_statement?.id &&
manifest.setRights(source.rights_statement.id);
Expand Down Expand Up @@ -203,6 +223,7 @@ function transform(response) {

source.file_sets
.filter((fileSet) => fileSet.role === "Auxiliary")
.filter((fileSet) => !isAltFormat(fileSet.mime_type))
.forEach((fileSet, index) => {
buildCanvasFromFileSet(fileSet, index, true);
});
Expand Down
15 changes: 15 additions & 0 deletions node/src/api/response/iiif/presentation-api/items.js
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,15 @@ function buildSupplementingAnnotation({ canvasId, fileSet }) {
};
}

function isAltFormat(mimeType) {
const acceptedTypes = [
"application/pdf",
"application/zip",
"application/zip-compressed",
];
return acceptedTypes.includes(mimeType);
}

function isAudioVideo(type) {
return ["Audio", "Video", "Sound"].includes(type);
}
Expand All @@ -65,13 +74,19 @@ function isImage(workType) {
return workType === "Image";
}

function isPDF(mimeType) {
return mimeType === "application/pdf";
}

module.exports = {
annotationType,
buildAnnotationBody,
buildAnnotationBodyId,
buildImageResourceId,
buildImageService,
buildSupplementingAnnotation,
isAltFormat,
isAudioVideo,
isImage,
isPDF,
};
154 changes: 136 additions & 18 deletions node/src/handlers/get-file-set-download.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,75 @@ const { SFNClient, StartExecutionCommand } = require("@aws-sdk/client-sfn");
const { wrap } = require("./middleware");
const { getFileSet } = require("../api/opensearch");
const { videoTranscodeSettings } = require("./transcode-templates");

const { getSignedUrl } = require("@aws-sdk/s3-request-presigner");
const { S3Client, GetObjectCommand } = require("@aws-sdk/client-s3");
const { apiTokenName } = require("../environment");
const ApiToken = require("../api/api-token");
const axios = require("axios").default;
const cookie = require("cookie");
const mime = require("mime-types");
const opensearchResponse = require("../api/response/opensearch");
const path = require("path");

/**
* Handler for download file set endpoint (currently only handles VIDEO)
* Handler for download file set endpoint
*/
exports.handler = wrap(async (event) => {
const id = event.pathParameters.id;
const email = event.queryStringParameters?.email;
if (!email) {
return invalidRequest(400, "Query string must include email address");
}

if (!event.userToken.isSuperUser()) {
return invalidRequest(401, "Unauthorized");
}
const allowPrivate =
event.userToken.isSuperUser() ||
event.userToken.isReadingRoom() ||
event.userToken.hasEntitlement(id);
const allowUnpublished =
event.userToken.isSuperUser() || event.userToken.hasEntitlement(id);
const esResponse = await getFileSet(id, {
allowPrivate: true,
allowUnpublished: true,
allowPrivate,
allowUnpublished,
});

if (esResponse.statusCode == "200") {
const doc = JSON.parse(esResponse.body);
if (downloadAvailable(doc)) {
return await processDownload(doc, email);
if (isVideoDownload(doc)) {
if (!email) {
return invalidRequest(400, "Query string must include email address");
}
if (!event.userToken.isSuperUser()) {
return invalidRequest(401, "Unauthorized");
}
return await processAVDownload(doc, email);
} else if (isImageDownload(doc)) {
return await IIIFImageRequest(doc);
} else if (isAltFileDownload(doc)) {
const url = await getDownloadLink(doc);
return {
statusCode: 302,
headers: { Location: url },
};
} else {
return invalidRequest(
405,
"Download only allowed for role: Access, work_type: Video, with a valid streaming_url"
);
return invalidRequest(405, "Download not allowed for role + work_type");
}
} else {
return await opensearchResponse.transform(esResponse);
}
});

function downloadAvailable(doc) {
function isAltFileDownload(doc) {
const acceptedTypes = [
"application/pdf",
"application/zip",
"application/zip-compressed",
];
return (
doc.found &&
doc._source.role === "Auxiliary" &&
doc._source.mime_type != null &&
acceptedTypes.includes(doc._source.mime_type)
);
}

function isVideoDownload(doc) {
// Note - audio is not currently implemented due to an issue with AWS
// & MediaConvert and our .m3u8 files
return (
Expand All @@ -51,7 +82,94 @@ function downloadAvailable(doc) {
);
}

async function processDownload(doc, email) {
function isImageDownload(doc) {
return (
doc.found &&
["Access", "Auxiliary"].includes(doc._source.role) &&
doc._source.mime_type != null &&
["image"].includes(doc._source.mime_type.split("/")[0])
);
}

function derivativeKey(doc) {
const id = doc._id;
let prefix =
id.slice(0, 2) +
"/" +
id.slice(2, 4) +
"/" +
id.slice(4, 6) +
"/" +
id.slice(6, 8);
return "derivatives/" + prefix + "/" + id;
}

async function getDownloadLink(doc) {
const clientParams = {};
const bucket = process.env.PYRAMID_BUCKET;
const key = derivativeKey(doc);

const getObjectParams = {
Bucket: bucket,
Key: key,
ResponseContentDisposition: `attachment; filename=${
doc._source.label
}.${mime.extension(doc._source.mime_type)}`,
};

const client = new S3Client(clientParams);
const command = new GetObjectCommand(getObjectParams);
const url = await getSignedUrl(client, command, { expiresIn: 3600 * 24 * 3 }); // 3 days
return url;
}

const getAxiosResponse = (url, config) => {
return new Promise((resolve) => {
axios
.get(url, config)
.then((response) => resolve(response))
.catch((error) => resolve(error.response));
});
};

const IIIFImageRequest = async (doc) => {
const dimensions = "/full/max/0/default.jpg";
const iiifImageBaseUrl = doc._source.representative_image_url;
const url = `${iiifImageBaseUrl}${dimensions}`;
const { status, headers, data } = await getAxiosResponse(url, {
headers: {
cookie: cookie.serialize(
apiTokenName(),
new ApiToken().superUser().sign(),
{
domain: "library.northwestern.edu",
path: "/",
secure: true,
}
),
},
responseType: "arraybuffer",
});

if (status != 200) {
return {
statusCode: status,
body: data.toString(),
headers: headers,
};
}

return {
statusCode: status,
isBase64Encoded: true,
body: data.toString("base64"),
headers: {
"content-type": headers["content-type"],
},
};
};

async function processAVDownload(doc, email) {
const stepFunctionConfig = process.env.STEP_FUNCTION_ENDPOINT
? { endpoint: process.env.STEP_FUNCTION_ENDPOINT }
: {};
Expand Down
Loading

0 comments on commit d3044d3

Please sign in to comment.