Skip to content

Commit

Permalink
Merge pull request #7689 from Snuffleupagus/relative-URLs
Browse files Browse the repository at this point in the history
[api-minor] Add support for relative URLs, in both annotations and the outline, by adding a `docBaseUrl` parameter to `PDFJS.getDocument` (bug 766086)
  • Loading branch information
Snuffleupagus authored Oct 19, 2016
2 parents 6906623 + d284cfd commit 7e392c0
Show file tree
Hide file tree
Showing 18 changed files with 450 additions and 191 deletions.
107 changes: 9 additions & 98 deletions src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,19 @@ var AnnotationFlag = sharedUtil.AnnotationFlag;
var AnnotationType = sharedUtil.AnnotationType;
var OPS = sharedUtil.OPS;
var Util = sharedUtil.Util;
var isBool = sharedUtil.isBool;
var isString = sharedUtil.isString;
var isArray = sharedUtil.isArray;
var isInt = sharedUtil.isInt;
var isValidUrl = sharedUtil.isValidUrl;
var stringToBytes = sharedUtil.stringToBytes;
var stringToPDFString = sharedUtil.stringToPDFString;
var stringToUTF8String = sharedUtil.stringToUTF8String;
var warn = sharedUtil.warn;
var Dict = corePrimitives.Dict;
var isDict = corePrimitives.isDict;
var isName = corePrimitives.isName;
var isRef = corePrimitives.isRef;
var Stream = coreStream.Stream;
var ColorSpace = coreColorSpace.ColorSpace;
var Catalog = coreObj.Catalog;
var ObjectLoader = coreObj.ObjectLoader;
var FileSpec = coreObj.FileSpec;
var OperatorList = coreEvaluator.OperatorList;
Expand All @@ -66,11 +64,12 @@ AnnotationFactory.prototype = /** @lends AnnotationFactory.prototype */ {
/**
* @param {XRef} xref
* @param {Object} ref
* @param {PDFManager} pdfManager
* @param {string} uniquePrefix
* @param {Object} idCounters
* @returns {Annotation}
*/
create: function AnnotationFactory_create(xref, ref,
create: function AnnotationFactory_create(xref, ref, pdfManager,
uniquePrefix, idCounters) {
var dict = xref.fetchIfRef(ref);
if (!isDict(dict)) {
Expand All @@ -90,6 +89,7 @@ AnnotationFactory.prototype = /** @lends AnnotationFactory.prototype */ {
ref: isRef(ref) ? ref : null,
subtype: subtype,
id: id,
pdfManager: pdfManager,
};

switch (subtype) {
Expand Down Expand Up @@ -842,103 +842,14 @@ var LinkAnnotation = (function LinkAnnotationClosure() {
function LinkAnnotation(params) {
Annotation.call(this, params);

var dict = params.dict;
var data = this.data;
data.annotationType = AnnotationType.LINK;

var action = dict.get('A'), url, dest;
if (action && isDict(action)) {
var linkType = action.get('S').name;
switch (linkType) {
case 'URI':
url = action.get('URI');
if (isName(url)) {
// Some bad PDFs do not put parentheses around relative URLs.
url = '/' + url.name;
} else if (url) {
url = addDefaultProtocolToUrl(url);
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;

case 'GoTo':
dest = action.get('D');
break;

case 'GoToR':
var urlDict = action.get('F');
if (isDict(urlDict)) {
// We assume that we found a FileSpec dictionary
// and fetch the URL without checking any further.
url = urlDict.get('F') || null;
} else if (isString(urlDict)) {
url = urlDict;
}

// NOTE: the destination is relative to the *remote* document.
var remoteDest = action.get('D');
if (remoteDest) {
if (isName(remoteDest)) {
remoteDest = remoteDest.name;
}
if (isString(url)) {
var baseUrl = url.split('#')[0];
if (isString(remoteDest)) {
// In practice, a named destination may contain only a number.
// If that happens, use the '#nameddest=' form to avoid the link
// redirecting to a page, instead of the correct destination.
url = baseUrl + '#' +
(/^\d+$/.test(remoteDest) ? 'nameddest=' : '') + remoteDest;
} else if (isArray(remoteDest)) {
url = baseUrl + '#' + JSON.stringify(remoteDest);
}
}
}
// The 'NewWindow' property, equal to `LinkTarget.BLANK`.
var newWindow = action.get('NewWindow');
if (isBool(newWindow)) {
data.newWindow = newWindow;
}
break;

case 'Named':
data.action = action.get('N').name;
break;

default:
warn('unrecognized link type: ' + linkType);
}
} else if (dict.has('Dest')) { // Simple destination link.
dest = dict.get('Dest');
}

if (url) {
if (isValidUrl(url, /* allowRelative = */ false)) {
data.url = tryConvertUrlEncoding(url);
}
}
if (dest) {
data.dest = isName(dest) ? dest.name : dest;
}
}

// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
if (isString(url) && url.indexOf('www.') === 0) {
return ('http://' + url);
}
return url;
}

function tryConvertUrlEncoding(url) {
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding, see Bugzilla 1122280.
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
Catalog.parseDestDictionary({
destDict: params.dict,
resultObj: data,
docBaseUrl: params.pdfManager.docBaseUrl,
});
}

Util.inherit(LinkAnnotation, Annotation, {});
Expand Down
1 change: 1 addition & 0 deletions src/core/document.js
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ var Page = (function PageClosure() {
for (var i = 0, n = annotationRefs.length; i < n; ++i) {
var annotationRef = annotationRefs[i];
var annotation = annotationFactory.create(this.xref, annotationRef,
this.pdfManager,
this.uniquePrefix,
this.idCounters);
if (annotation) {
Expand Down
159 changes: 139 additions & 20 deletions src/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ var createPromiseCapability = sharedUtil.createPromiseCapability;
var error = sharedUtil.error;
var info = sharedUtil.info;
var isArray = sharedUtil.isArray;
var isBool = sharedUtil.isBool;
var isInt = sharedUtil.isInt;
var isString = sharedUtil.isString;
var shadow = sharedUtil.shadow;
var stringToPDFString = sharedUtil.stringToPDFString;
var stringToUTF8String = sharedUtil.stringToUTF8String;
var warn = sharedUtil.warn;
var isValidUrl = sharedUtil.isValidUrl;
var createValidAbsoluteUrl = sharedUtil.createValidAbsoluteUrl;
var Util = sharedUtil.Util;
var Ref = corePrimitives.Ref;
var RefSet = corePrimitives.RefSet;
Expand Down Expand Up @@ -152,23 +153,12 @@ var Catalog = (function CatalogClosure() {
}
assert(outlineDict.has('Title'), 'Invalid outline item');

var actionDict = outlineDict.get('A'), dest = null, url = null;
if (actionDict) {
var destEntry = actionDict.get('D');
if (destEntry) {
dest = destEntry;
} else {
var uriEntry = actionDict.get('URI');
if (isString(uriEntry) && isValidUrl(uriEntry, false)) {
url = uriEntry;
}
}
} else if (outlineDict.has('Dest')) {
dest = outlineDict.getRaw('Dest');
if (isName(dest)) {
dest = dest.name;
}
}
var data = { url: null, dest: null, };
Catalog.parseDestDictionary({
destDict: outlineDict,
resultObj: data,
docBaseUrl: this.pdfManager.docBaseUrl,
});
var title = outlineDict.get('Title');
var flags = outlineDict.get('F') || 0;

Expand All @@ -179,8 +169,9 @@ var Catalog = (function CatalogClosure() {
rgbColor = ColorSpace.singletons.rgb.getRgb(color, 0);
}
var outlineItem = {
dest: dest,
url: url,
dest: data.dest,
url: data.url,
newWindow: data.newWindow,
title: stringToPDFString(title),
color: rgbColor,
count: outlineDict.get('Count'),
Expand Down Expand Up @@ -595,6 +586,134 @@ var Catalog = (function CatalogClosure() {
}
};

/**
* Helper function used to parse the contents of destination dictionaries.
* @param {Dict} destDict - The dictionary containing the destination.
* @param {Object} resultObj - The object where the parsed destination
* properties will be placed.
* @param {string} docBaseUrl - (optional) The document base URL that is used
* when attempting to recover valid absolute URLs from relative ones.
*/
Catalog.parseDestDictionary = function Catalog_parseDestDictionary(params) {
// Lets URLs beginning with 'www.' default to using the 'http://' protocol.
function addDefaultProtocolToUrl(url) {
if (url.indexOf('www.') === 0) {
return ('http://' + url);
}
return url;
}
// According to ISO 32000-1:2008, section 12.6.4.7, URIs should be encoded
// in 7-bit ASCII. Some bad PDFs use UTF-8 encoding, see Bugzilla 1122280.
function tryConvertUrlEncoding(url) {
try {
return stringToUTF8String(url);
} catch (e) {
return url;
}
}

var destDict = params.destDict;
if (!isDict(destDict)) {
warn('Catalog_parseDestDictionary: "destDict" must be a dictionary.');
return;
}
var resultObj = params.resultObj;
if (typeof resultObj !== 'object') {
warn('Catalog_parseDestDictionary: "resultObj" must be an object.');
return;
}
var docBaseUrl = params.docBaseUrl || null;

var action = destDict.get('A'), url, dest;
if (isDict(action)) {
var linkType = action.get('S').name;
switch (linkType) {
case 'URI':
url = action.get('URI');
if (isName(url)) {
// Some bad PDFs do not put parentheses around relative URLs.
url = '/' + url.name;
} else if (isString(url)) {
url = addDefaultProtocolToUrl(url);
}
// TODO: pdf spec mentions urls can be relative to a Base
// entry in the dictionary.
break;

case 'GoTo':
dest = action.get('D');
break;

case 'GoToR':
var urlDict = action.get('F');
if (isDict(urlDict)) {
// We assume that we found a FileSpec dictionary
// and fetch the URL without checking any further.
url = urlDict.get('F') || null;
} else if (isString(urlDict)) {
url = urlDict;
}

// NOTE: the destination is relative to the *remote* document.
var remoteDest = action.get('D');
if (remoteDest) {
if (isName(remoteDest)) {
remoteDest = remoteDest.name;
}
if (isString(url)) {
var baseUrl = url.split('#')[0];
if (isString(remoteDest)) {
// In practice, a named destination may contain only a number.
// If that happens, use the '#nameddest=' form to avoid the link
// redirecting to a page, instead of the correct destination.
url = baseUrl + '#' +
(/^\d+$/.test(remoteDest) ? 'nameddest=' : '') + remoteDest;
} else if (isArray(remoteDest)) {
url = baseUrl + '#' + JSON.stringify(remoteDest);
}
}
}
// The 'NewWindow' property, equal to `LinkTarget.BLANK`.
var newWindow = action.get('NewWindow');
if (isBool(newWindow)) {
resultObj.newWindow = newWindow;
}
break;

case 'Named':
var namedAction = action.get('N');
if (isName(namedAction)) {
resultObj.action = namedAction.name;
}
break;

default:
warn('Catalog_parseDestDictionary: Unrecognized link type "' +
linkType + '".');
break;
}
} else if (destDict.has('Dest')) { // Simple destination link.
dest = destDict.get('Dest');
}

if (isString(url)) {
url = tryConvertUrlEncoding(url);
var absoluteUrl = createValidAbsoluteUrl(url, docBaseUrl);
if (absoluteUrl) {
resultObj.url = absoluteUrl.href;
}
resultObj.unsafeUrl = url;
}
if (dest) {
if (isName(dest)) {
dest = dest.name;
}
if (isString(dest) || isArray(dest)) {
resultObj.dest = dest;
}
}
};

return Catalog;
})();

Expand Down
Loading

0 comments on commit 7e392c0

Please sign in to comment.