From d385645bd5d72e9a5c71e77c1d3728803f236ce5 Mon Sep 17 00:00:00 2001 From: Ward Peeters Date: Wed, 6 Jun 2018 18:39:26 +0200 Subject: [PATCH] core(preload): only allow same origin (domain + subdomains) (#5065) --- lighthouse-core/audits/uses-rel-preload.js | 19 ++++++- lighthouse-core/lib/url-shim.js | 54 ++++++++++++++++++ .../test/audits/uses-rel-preload-test.js | 50 ++++++++++++++--- lighthouse-core/test/lib/url-shim-test.js | 55 +++++++++++++++++++ 4 files changed, 169 insertions(+), 9 deletions(-) diff --git a/lighthouse-core/audits/uses-rel-preload.js b/lighthouse-core/audits/uses-rel-preload.js index 4cf0e5e6314a..4625d4b3d0d9 100644 --- a/lighthouse-core/audits/uses-rel-preload.js +++ b/lighthouse-core/audits/uses-rel-preload.js @@ -5,6 +5,7 @@ */ 'use strict'; +const URL = require('../lib/url-shim'); const Audit = require('./audit'); const UnusedBytes = require('./byte-efficiency/byte-efficiency-audit'); const THRESHOLD_IN_MS = 100; @@ -57,6 +58,20 @@ class UsesRelPreloadAudit extends Audit { return requests; } + /** + * + * @param {LH.WebInspector.NetworkRequest} request + * @param {LH.WebInspector.NetworkRequest} mainResource + * @return {boolean} + */ + static shouldPreload(request, mainResource) { + if (request._isLinkPreload || request.protocol === 'data') { + return false; + } + + return URL.rootDomainsMatch(request.url, mainResource.url); + } + /** * Computes the estimated effect of preloading all the resources. * @param {Set} urls The array of byte savings results per resource @@ -159,8 +174,8 @@ class UsesRelPreloadAudit extends Audit { /** @type {Set} */ const urls = new Set(); for (const networkRecord of criticalRequests) { - if (!networkRecord._isLinkPreload && networkRecord.protocol !== 'data') { - urls.add(networkRecord._url); + if (UsesRelPreloadAudit.shouldPreload(networkRecord, mainResource)) { + urls.add(networkRecord.url); } } diff --git a/lighthouse-core/lib/url-shim.js b/lighthouse-core/lib/url-shim.js index 9bb0b5c6690b..78ee0fbbe979 100644 --- a/lighthouse-core/lib/url-shim.js +++ b/lighthouse-core/lib/url-shim.js @@ -17,6 +17,12 @@ const Util = require('../report/html/renderer/util.js'); const URL = /** @type {!Window["URL"]} */ (typeof self !== 'undefined' && self.URL) || require('url').URL; +// 25 most used tld plus one domains from http archive. +// @see https://github.com/GoogleChrome/lighthouse/pull/5065#discussion_r191926212 +const listOfTlds = [ + 'com', 'co', 'gov', 'edu', 'ac', 'org', 'go', 'gob', 'or', 'net', 'in', 'ne', 'nic', 'gouv', + 'web', 'spb', 'blog', 'jus', 'kiev', 'mil', 'wi', 'qc', 'ca', 'bel', 'on', +]; /** * There is fancy URL rewriting logic for the chrome://settings page that we need to work around. * Why? Special handling was added by Chrome team to allow a pushState transition between chrome:// pages. @@ -87,6 +93,54 @@ class URLShim extends URL { } } + /** + * Gets the tld of a domain + * + * @param {string} hostname + * @return {string} tld + */ + static getTld(hostname) { + const tlds = hostname.split('.').slice(-2); + + if (!listOfTlds.includes(tlds[0])) { + return `.${tlds[tlds.length - 1]}`; + } + + return `.${tlds.join('.')}`; + } + + /** + * Check if rootDomains matches + * + * @param {string} urlA + * @param {string} urlB + */ + static rootDomainsMatch(urlA, urlB) { + let urlAInfo; + let urlBInfo; + try { + urlAInfo = new URL(urlA); + urlBInfo = new URL(urlB); + } catch (err) { + return false; + } + + if (!urlAInfo.hostname || !urlBInfo.hostname) { + return false; + } + + const tldA = URLShim.getTld(urlAInfo.hostname); + const tldB = URLShim.getTld(urlBInfo.hostname); + + // get the string before the tld + const urlARootDomain = urlAInfo.hostname.replace(new RegExp(`${tldA}$`), '') + .split('.').splice(-1)[0]; + const urlBRootDomain = urlBInfo.hostname.replace(new RegExp(`${tldB}$`), '') + .split('.').splice(-1)[0]; + + return urlARootDomain === urlBRootDomain; + } + /** * @param {string} url * @param {{numPathParts: number, preserveQuery: boolean, preserveHost: boolean}=} options diff --git a/lighthouse-core/test/audits/uses-rel-preload-test.js b/lighthouse-core/test/audits/uses-rel-preload-test.js index 2eec167b175e..5cd3ff7cbbea 100644 --- a/lighthouse-core/test/audits/uses-rel-preload-test.js +++ b/lighthouse-core/test/audits/uses-rel-preload-test.js @@ -50,14 +50,18 @@ describe('Performance: uses-rel-preload audit', () => { it('should suggest preload resource', () => { const rootNode = buildNode(1, 'http://example.com'); - const mainDocumentNode = buildNode(2, 'http://www.example.com'); + const mainDocumentNode = buildNode(2, 'http://www.example.com:3000'); const scriptNode = buildNode(3, 'http://www.example.com/script.js'); const scriptAddedNode = buildNode(4, 'http://www.example.com/script-added.js'); + const scriptSubNode = buildNode(5, 'http://sub.example.com/script-sub.js'); + const scriptOtherNode = buildNode(6, 'http://otherdomain.com/script-other.js'); mainDocumentNode.setIsMainDocument(true); mainDocumentNode.addDependency(rootNode); scriptNode.addDependency(mainDocumentNode); scriptAddedNode.addDependency(scriptNode); + scriptSubNode.addDependency(scriptNode); + scriptOtherNode.addDependency(scriptNode); mockGraph = rootNode; mockSimulator = { @@ -69,41 +73,63 @@ describe('Performance: uses-rel-preload audit', () => { const mainDocumentNodeLocal = nodesByUrl.get(mainDocumentNode.record.url); const scriptNodeLocal = nodesByUrl.get(scriptNode.record.url); const scriptAddedNodeLocal = nodesByUrl.get(scriptAddedNode.record.url); + const scriptSubNodeLocal = nodesByUrl.get(scriptSubNode.record.url); + const scriptOtherNodeLocal = nodesByUrl.get(scriptOtherNode.record.url); const nodeTimings = new Map([ [rootNodeLocal, {starTime: 0, endTime: 500}], [mainDocumentNodeLocal, {startTime: 500, endTime: 1000}], [scriptNodeLocal, {startTime: 1000, endTime: 2000}], [scriptAddedNodeLocal, {startTime: 2000, endTime: 3250}], + [scriptSubNodeLocal, {startTime: 2000, endTime: 3000}], + [scriptOtherNodeLocal, {startTime: 2000, endTime: 3500}], ]); if (scriptAddedNodeLocal.getDependencies()[0] === mainDocumentNodeLocal) { nodeTimings.set(scriptAddedNodeLocal, {startTime: 1000, endTime: 2000}); } - return {timeInMs: 3250, nodeTimings}; + if (scriptSubNodeLocal.getDependencies()[0] === mainDocumentNodeLocal) { + nodeTimings.set(scriptSubNodeLocal, {startTime: 1000, endTime: 2000}); + } + + if (scriptOtherNodeLocal.getDependencies()[0] === mainDocumentNodeLocal) { + nodeTimings.set(scriptOtherNodeLocal, {startTime: 1000, endTime: 2500}); + } + + return {timeInMs: 3500, nodeTimings}; }, }; const mainResource = Object.assign({}, defaultMainResource, { - url: 'http://www.example.com', + url: 'http://www.example.com:3000', redirects: [''], }); const networkRecords = [ { requestId: '2', _isLinkPreload: false, - _url: 'http://www.example.com', + url: 'http://www.example.com:3000', }, { requestId: '3', _isLinkPreload: false, - _url: 'http://www.example.com/script.js', + url: 'http://www.example.com/script.js', }, { requestId: '4', _isLinkPreload: false, - _url: 'http://www.example.com/script-added.js', + url: 'http://www.example.com/script-added.js', + }, + { + requestId: '5', + _isLinkPreload: false, + url: 'http://sub.example.com/script-sub.js', + }, + { + requestId: '6', + _isLinkPreload: false, + url: 'http://otherdomain.com/script-other.js', }, ]; @@ -120,6 +146,14 @@ describe('Performance: uses-rel-preload audit', () => { request: networkRecords[2], children: {}, }, + '5': { + request: networkRecords[3], + children: {}, + }, + '6': { + request: networkRecords[4], + children: {}, + }, }, }, }, @@ -131,7 +165,9 @@ describe('Performance: uses-rel-preload audit', () => { return UsesRelPreload.audit(mockArtifacts(networkRecords, chains, mainResource), {}).then( output => { assert.equal(output.rawValue, 1250); - assert.equal(output.details.items.length, 1); + assert.equal(output.details.items.length, 2); + assert.equal(output.details.items[0].url, 'http://www.example.com/script-added.js'); + assert.equal(output.details.items[1].url, 'http://sub.example.com/script-sub.js'); } ); }); diff --git a/lighthouse-core/test/lib/url-shim-test.js b/lighthouse-core/test/lib/url-shim-test.js index 8eba0d943f0e..c3a3d160ca09 100644 --- a/lighthouse-core/test/lib/url-shim-test.js +++ b/lighthouse-core/test/lib/url-shim-test.js @@ -92,6 +92,61 @@ describe('URL Shim', () => { assert.equal(URL.getOrigin(urlD), null); }); + describe('getTld', () => { + it('returns the correct tld', () => { + assert.equal(URL.getTld('example.com'), '.com'); + assert.equal(URL.getTld('example.co.uk'), '.co.uk'); + assert.equal(URL.getTld('example.com.br'), '.com.br'); + assert.equal(URL.getTld('example.tokyo.jp'), '.jp'); + }); + }); + + describe('rootDomainsMatch', () => { + it('matches a subdomain and a root domain', () => { + const urlA = 'http://example.com/js/test.js'; + const urlB = 'http://example.com/'; + const urlC = 'http://sub.example.com/js/test.js'; + const urlD = 'http://sub.otherdomain.com/js/test.js'; + + assert.ok(URL.rootDomainsMatch(urlA, urlB)); + assert.ok(URL.rootDomainsMatch(urlA, urlC)); + assert.ok(!URL.rootDomainsMatch(urlA, urlD)); + assert.ok(!URL.rootDomainsMatch(urlB, urlD)); + }); + + it(`doesn't break on urls without a valid host`, () => { + const urlA = 'http://example.com/js/test.js'; + const urlB = 'data:image/jpeg;base64,foobar'; + const urlC = 'anonymous:90'; + const urlD = '!!garbage'; + const urlE = 'file:///opt/lighthouse/index.js'; + + assert.ok(!URL.rootDomainsMatch(urlA, urlB)); + assert.ok(!URL.rootDomainsMatch(urlA, urlC)); + assert.ok(!URL.rootDomainsMatch(urlA, urlD)); + assert.ok(!URL.rootDomainsMatch(urlA, urlE)); + assert.ok(!URL.rootDomainsMatch(urlB, urlC)); + assert.ok(!URL.rootDomainsMatch(urlB, urlD)); + assert.ok(!URL.rootDomainsMatch(urlB, urlE)); + }); + + it(`matches tld plus domains`, () => { + const coUkA = 'http://example.co.uk/js/test.js'; + const coUkB = 'http://sub.example.co.uk/js/test.js'; + const testUkA = 'http://example.test.uk/js/test.js'; + const testUkB = 'http://sub.example.test.uk/js/test.js'; + const ltdBrA = 'http://example.ltd.br/js/test.js'; + const ltdBrB = 'http://sub.example.ltd.br/js/test.js'; + const privAtA = 'http://examplepriv.at/js/test.js'; + const privAtB = 'http://sub.examplepriv.at/js/test.js'; + + assert.ok(URL.rootDomainsMatch(coUkA, coUkB)); + assert.ok(URL.rootDomainsMatch(testUkA, testUkB)); + assert.ok(URL.rootDomainsMatch(ltdBrA, ltdBrB)); + assert.ok(URL.rootDomainsMatch(privAtA, privAtB)); + }); + }); + describe('getURLDisplayName', () => { it('respects numPathParts option', () => { const url = 'http://example.com/a/deep/nested/file.css';