-
Notifications
You must be signed in to change notification settings - Fork 9.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
core(jsonld): add structured data validation #6750
Changes from 13 commits
5f02a63
2883d90
4d27cd0
e9daf3d
2371450
179796a
a21b921
6ac3092
e8714ce
12ff45f
6a981d9
21ec416
75b7a51
eca612a
e345a5e
ca2443b
5ae8632
a6d2aaa
965541d
dddba0f
1fc3d13
e4ba275
d29678b
9b7abd3
1bd1f9e
a4200aa
fedd5cb
064c104
4ebdd17
df921da
ce535cf
002a663
70028c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,8 +32,9 @@ | |
"unit-cli": "jest --runInBand \"lighthouse-cli/\"", | ||
"unit-cli:ci": "jest --runInBand --coverage --ci \"lighthouse-cli/\"", | ||
"unit-viewer": "mocha --reporter dot \"lighthouse-viewer/test/**/*-test.js\"", | ||
"unit": "yarn unit-core && yarn unit-cli && yarn unit-viewer", | ||
"unit:ci": "yarn unit-core:ci && yarn unit-cli:ci && yarn unit-viewer", | ||
"unit-sd": "mocha --reporter dot \"sd-validation/test/**/*-test.js\"", | ||
"unit": "yarn unit-core && yarn unit-cli && yarn unit-viewer && yarn unit-sd", | ||
"unit:ci": "yarn unit-core:ci && yarn unit-cli:ci && yarn unit-viewer && yarn unit-sd", | ||
"core-unit": "yarn unit-core", | ||
"cli-unit": "yarn unit-cli", | ||
"viewer-unit": "yarn unit-viewer", | ||
|
@@ -129,6 +130,7 @@ | |
"prettier": "^1.14.3", | ||
"pretty-json-stringify": "^0.0.2", | ||
"puppeteer": "^1.10.0", | ||
"request-promise-native": "^1.0.5", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the docs for |
||
"sinon": "^2.3.5", | ||
"typescript": "3.2.2", | ||
"uglify-es": "3.0.15", | ||
|
@@ -149,6 +151,8 @@ | |
"intl-messageformat-parser": "^1.4.0", | ||
"jpeg-js": "0.1.2", | ||
"js-library-detector": "^5.1.0", | ||
"jsonld": "^1.5.0", | ||
"jsonlint-mod": "^1.7.4", | ||
"lighthouse-logger": "^1.2.0", | ||
"lodash.isequal": "^4.5.0", | ||
"lookup-closest-locale": "6.0.4", | ||
|
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
/** | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const {URL} = require('url'); | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const jsonld = require('jsonld'); | ||
// @ts-ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this needs to be ignored anymore There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
const schemaOrgContext = require('./assets/jsonldcontext.json'); | ||
const SCHEMA_ORG_HOST = 'schema.org'; | ||
|
||
/** | ||
* Custom loader that prevents network calls and allows us to return local version of the | ||
* schema.org document | ||
* @param {string} schemaUrl | ||
* @param {function(null|Error, Object|undefined):void} callback | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
*/ | ||
function documentLoader(schemaUrl, callback) { | ||
let urlObj = null; | ||
|
||
try { | ||
urlObj = new URL(schemaUrl, 'http://example.com'); | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} catch (e) { | ||
return callback(Error('Error parsing URL: ' + schemaUrl), undefined); | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
if (urlObj && urlObj.host === SCHEMA_ORG_HOST && urlObj.pathname === '/') { | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
callback(null, { | ||
document: schemaOrgContext, | ||
}); | ||
} else { | ||
// We only process schema.org, for other schemas we return an empty object | ||
callback(null, { | ||
document: {}, | ||
}); | ||
} | ||
} | ||
|
||
/** | ||
* Takes JSON-LD object and normalizes it by following the expansion algorithm | ||
* (https://json-ld.org/spec/latest/json-ld-api/#expansion). | ||
* | ||
* @param {Object} inputObject | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* @returns {Promise<Object>} | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
*/ | ||
module.exports = async function expand(inputObject) { | ||
try { | ||
return await jsonld.expand(inputObject, {documentLoader}); | ||
} catch (err) { | ||
// jsonld wraps real errors in a bunch of junk, so see we have an underlying error first | ||
if (err.details && err.details.cause) throw err.details.cause; | ||
throw err; | ||
} | ||
}; |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,31 @@ | ||||||
/** | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||||||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||||||
*/ | ||||||
'use strict'; | ||||||
|
||||||
/** | ||||||
* Recursively (DFS) traverses an object and calls provided function for each field. | ||||||
* | ||||||
* @param {Object} obj | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
* @param {function(String, any, Array<String>, Object): void} callback | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* @param {Array<String>} path | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
*/ | ||||||
module.exports = function walkObject(obj, callback, path = []) { | ||||||
if (obj === null) { | ||||||
return; | ||||||
} | ||||||
|
||||||
Object.keys(obj).forEach(fieldName => { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
const fieldValue = obj[fieldName]; | ||||||
const newPath = Array.from(path); | ||||||
newPath.push(fieldName); | ||||||
|
||||||
callback(fieldName, fieldValue, newPath, obj); | ||||||
|
||||||
if (typeof fieldValue === 'object') { | ||||||
walkObject(fieldValue, callback, newPath); | ||||||
} | ||||||
}); | ||||||
}; |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,83 @@ | ||||||
/** | ||||||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||||||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||||||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||||||
*/ | ||||||
'use strict'; | ||||||
|
||||||
const parseJSON = require('./json.js'); | ||||||
const validateJsonLD = require('./jsonld.js'); | ||||||
const promiseExpand = require('./expand.js'); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. drop the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
const validateSchemaOrg = require('./schema.js'); | ||||||
|
||||||
/** | ||||||
* Validates JSON-LD input. Returns array of error objects. | ||||||
* | ||||||
* @param {string} textInput | ||||||
* @returns {Promise<Array<{path: ?string, validator: string, message: string}>>} | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. more specific validator type? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
*/ | ||||||
module.exports = async function validate(textInput) { | ||||||
/** @type {Array<{path: ?string, validator: string, message: string}>} */ | ||||||
const errors = []; | ||||||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
// STEP 1: VALIDATE JSON | ||||||
const parseOutput = parseJSON(textInput); | ||||||
|
||||||
if (parseOutput.error) { | ||||||
errors.push({ | ||||||
validator: 'json', | ||||||
path: parseOutput.error.line, | ||||||
message: parseOutput.error.message, | ||||||
}); | ||||||
|
||||||
return errors; | ||||||
} | ||||||
|
||||||
const inputObject = parseOutput.result; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could move the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
|
||||||
// STEP 2: VALIDATE JSONLD | ||||||
const jsonLdErrors = validateJsonLD(inputObject); | ||||||
|
||||||
if (jsonLdErrors && jsonLdErrors.length) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
(since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
jsonLdErrors.forEach(error => { | ||||||
errors.push({ | ||||||
validator: 'json-ld', | ||||||
path: error.path, | ||||||
message: error.message.toString(), | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
}); | ||||||
}); | ||||||
|
||||||
return errors; | ||||||
} | ||||||
|
||||||
// STEP 3: EXPAND | ||||||
let expandedObj = null; | ||||||
try { | ||||||
expandedObj = await promiseExpand(inputObject); | ||||||
} catch (error) { | ||||||
errors.push({ | ||||||
validator: 'json-ld-expand', | ||||||
path: null, | ||||||
message: error && error.toString(), | ||||||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
}); | ||||||
|
||||||
return errors; | ||||||
} | ||||||
|
||||||
// STEP 4: VALIDATE SCHEMA | ||||||
const schemaOrgErrors = validateSchemaOrg(expandedObj); | ||||||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
|
||||||
if (schemaOrgErrors && schemaOrgErrors.length) { | ||||||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
schemaOrgErrors.forEach(error => { | ||||||
errors.push({ | ||||||
validator: 'schema-org', | ||||||
path: error.path, | ||||||
message: error.message, | ||||||
}); | ||||||
}); | ||||||
|
||||||
return errors; | ||||||
} | ||||||
|
||||||
return errors; | ||||||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
/** | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const jsonlint = require('jsonlint-mod'); | ||
|
||
/** | ||
* @param {string} input | ||
* @returns {{error: {message: string, line: string|null}|null, result: Object|null}} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
*/ | ||
module.exports = function parseJSON(input) { | ||
let result; | ||
|
||
try { | ||
jsonlint.parse(input); | ||
result = JSON.parse(input); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why don't we use the return value from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
oh, I see, this is just a json linter, not a json-ld linter.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
} catch (error) { | ||
let line = error.at; | ||
let message = error.message; | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// extract line number from message | ||
if (!line) { | ||
const regexLineResult = error.message.match(/Parse error on line (\d+)/); | ||
|
||
if (regexLineResult) { | ||
line = regexLineResult[1]; | ||
} | ||
} | ||
|
||
// adjust jsonlint error output to our needs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. docs for this regex There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
const regexMessageResult = error.message.match(/-+\^\n(.+)$/); | ||
|
||
if (regexMessageResult) { | ||
message = regexMessageResult[1]; | ||
} | ||
|
||
return { | ||
error: { | ||
message, | ||
line, | ||
}, | ||
result, | ||
}; | ||
} | ||
|
||
return { | ||
error: null, | ||
result, | ||
}; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/** | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* @license Copyright 2018 Google Inc. All Rights Reserved. | ||
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. | ||
*/ | ||
'use strict'; | ||
|
||
const walkObject = require('./helpers/walkObject.js'); | ||
|
||
// This list comes from the JSON-LD 1.1 spec: https://json-ld.org/spec/latest/json-ld/#syntax-tokens-and-keywords | ||
patrickhulce marked this conversation as resolved.
Show resolved
Hide resolved
|
||
const KEYWORDS = [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe rename There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
'@base', | ||
'@container', | ||
'@context', | ||
'@graph', | ||
'@id', | ||
'@index', | ||
'@language', | ||
'@list', | ||
'@nest', | ||
'@none', | ||
'@prefix', | ||
'@reverse', | ||
'@set', | ||
'@type', | ||
'@value', | ||
'@version', | ||
'@vocab', | ||
]; | ||
|
||
/** | ||
* @param {string} fieldName | ||
* @returns boolean | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
*/ | ||
function validKeyword(fieldName) { | ||
return KEYWORDS.includes(fieldName); | ||
} | ||
|
||
/** | ||
* @param {string} keyName | ||
* @returns {string | null} error | ||
*/ | ||
function validateKey(keyName) { | ||
if (keyName[0] === '@' && !validKeyword(keyName)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and just inline There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done++ |
||
return 'Unknown keyword'; | ||
} | ||
|
||
return null; | ||
} | ||
|
||
/** | ||
* @param {Object} json | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
* @returns {Array<{path: string, message: string}>} | ||
*/ | ||
module.exports = function validateJsonLD(json) { | ||
/** @type {Array<{path: string, message: string}>} */ | ||
const errors = []; | ||
|
||
walkObject(json, (name, value, path) => { | ||
const error = validateKey.call(null, name); | ||
|
||
if (error) { | ||
errors.push({ | ||
path: path.join('/'), | ||
message: error, | ||
}); | ||
} | ||
}); | ||
|
||
return errors; | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe since there's just the one keyword check, this file could be something like const KEYWORDS = [
// ...
];
/**
* @param {string} keyName
* @returns {boolean}
*/
function isInvalidKeyword(keyName) {
return keyName.startsWith('@') && !VALID_KEYWORDS.includes(keyName);
}
/**
* @param {any} json
* @returns {Array<{path: string, message: string}>}
*/
module.exports = function validateJsonLD(json) {
/** @type {Array<{path: string, message: string}>} */
const errors = [];
walkObject(json, (keyName, value, path) => {
if (isInvalidKeyword(keyName)) {
errors.push({
path: path.join('/'),
message: 'Unknown keyword',
});
}
});
return errors;
}; There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh, yeah this is what |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we should use jest instead of another use of mocha (which will need to be removed later) if it's not too hard to switch over
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done