From 7b697adc9e968890846f157d8b1a816a58bdc27c Mon Sep 17 00:00:00 2001 From: Ward Peeters Date: Sun, 20 May 2018 23:43:18 +0200 Subject: [PATCH] Create tld plus one helper --- lighthouse-core/lib/url-shim.js | 51 +++++++++++++++++++++-- lighthouse-core/test/lib/url-shim-test.js | 17 +++++++- 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/lighthouse-core/lib/url-shim.js b/lighthouse-core/lib/url-shim.js index b3f625aa8504..45e188823c45 100644 --- a/lighthouse-core/lib/url-shim.js +++ b/lighthouse-core/lib/url-shim.js @@ -17,6 +17,24 @@ const Util = require('../report/html/renderer/util.js'); const URL = /** @type {!Window["URL"]} */ (typeof self !== 'undefined' && self.URL) || require('url').URL; +const tldPlusOne = { + ar: ['com', 'edu', 'gob', 'int', 'mil', 'mar', 'net', 'org', 'tur', 'musica'], + at: ['co', 'or', 'priv', 'ac'], + fr: ['avocat', 'aeroport', 'veterinaire'], + nz: ['ac', 'co', 'school', 'cri', 'govt', 'mil', 'parliament'], + il: ['org', 'k12', 'gov', 'muni', 'idf'], + ru: ['com', 'edu', 'gob', 'int', 'mil', 'mar', 'net', 'org', 'tur', 'musica'], + za: ['ac', 'gov', 'law', 'mil', 'nom', 'school', 'net'], + kr: ['ac', 'co', 'es', 'go', 'hs', 'kg', 'mil', 'ms', 'ne', 'or', 'pe', 're', 'sc', 'busan', + 'chungbuk', 'chungnam', 'daegu', 'daejeon', 'gangwon', 'gwangju', 'gyeongbuk', 'gyeonggi', + 'gyeongnam', 'incheon', 'jeju', 'jeonbuk', 'jeonnam', 'seoul', 'ulsan'], + es: ['org', 'gob'], + tr: ['com', 'info', 'biz', 'net', 'org', 'web', 'gen', 'tv', 'av', 'dr', 'bbs', 'name', 'tel', + 'gov', 'bel', 'pol', 'mil', 'k12', 'edu', 'kep', 'nc', 'gov.nc'], + ua: ['gov', 'com', 'in', 'org', 'net', 'edu'], + uk: ['co', 'org', 'me', 'ltd', 'plc', 'net', 'sch', 'ac', 'gov', 'mod', 'mil', 'nhs', 'police'], +}; + /** * There is fancy URL rewriting logic for the chrome://settings page that we need to work around. * Why? Special handling was added by Chrome team to allow a pushState transition between chrome:// pages. @@ -32,6 +50,31 @@ function rewriteChromeInternalUrl(url) { return url.replace(/^chrome:\/\/chrome\//, 'chrome://'); } +/** + * Checks if an url contains a TLD plus one domain + * + * @param {string} url + * @return {boolean} + */ +function isTldPlusDomain(url) { + try { + const parsedUrl = new URL(url); + if (!parsedUrl.hostname) { + return false; + } + + const tld = parsedUrl.hostname.split('.').slice(-1)[0]; + if (!tldPlusOne[tld]) { + return false; + } + + const tldPlusOneRegex = new RegExp(`\\.(${tldPlusOne[tld].join('|')})\\.${tld}`); + return tldPlusOneRegex.test(url); + } catch (err) { + return false; + } +} + class URLShim extends URL { /** * @param {string} url @@ -107,11 +150,11 @@ class URLShim extends URL { return false; } - const isTldA = isTldPlusDomain(urlAInfo.hostname); - const isTldB = isTldPlusDomain(urlBInfo.hostname); + const isTldPlusOneA = isTldPlusDomain(urlA); + const isTldPlusOneB = isTldPlusDomain(urlB); - const urlARootDomain = urlAInfo.hostname.split('.').slice(isTldA ? -3 : -2).join('.'); - const urlBRootDomain = urlBInfo.hostname.split('.').slice(isTldB ? -3 : -2).join('.'); + const urlARootDomain = urlAInfo.hostname.split('.').slice(isTldPlusOneA ? -3 : -2).join('.'); + const urlBRootDomain = urlBInfo.hostname.split('.').slice(isTldPlusOneB ? -3 : -2).join('.'); return urlARootDomain === urlBRootDomain; } diff --git a/lighthouse-core/test/lib/url-shim-test.js b/lighthouse-core/test/lib/url-shim-test.js index 3e7d5ebe6adc..bda6a491bcae 100644 --- a/lighthouse-core/test/lib/url-shim-test.js +++ b/lighthouse-core/test/lib/url-shim-test.js @@ -94,7 +94,6 @@ describe('URL Shim', () => { describe('rootDomainsMatch', () => { it('matches a subdomain and a root domain', () => { - const urlA = 'http://example.com/js/test.js'; const urlB = 'http://example.com/'; const urlC = 'http://sub.example.com/js/test.js'; @@ -121,6 +120,22 @@ describe('URL Shim', () => { assert.ok(!URL.rootDomainsMatch(urlB, urlD)); assert.ok(!URL.rootDomainsMatch(urlB, urlE)); }); + + it(`matches tld plus domains`, () => { + const coUkA = 'http://example.co.uk/js/test.js'; + const coUkB = 'http://sub.example.co.uk/js/test.js'; + const testUkA = 'http://example.test.uk/js/test.js'; + const testUkB = 'http://sub.example.test.uk/js/test.js'; + const ltdBrA = 'http://example.ltd.br/js/test.js'; + const ltdBrB = 'http://sub.example.ltd.br/js/test.js'; + const privAtA = 'http://examplepriv.at/js/test.js'; + const privAtB = 'http://sub.examplepriv.at/js/test.js'; + + assert.ok(URL.rootDomainsMatch(coUkA, coUkB)); + assert.ok(URL.rootDomainsMatch(testUkA, testUkB)); + assert.ok(URL.rootDomainsMatch(ltdBrA, ltdBrB)); + assert.ok(URL.rootDomainsMatch(privAtA, privAtB)); + }); }); describe('getURLDisplayName', () => {