From 029b9292aa2e69f3244f5f2266241adc840370c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Berson?= Date: Mon, 3 Dec 2018 16:44:09 +0100 Subject: [PATCH] Fix style injection in pages + optimizations --- CHANGELOG.md | 5 + example/background.ts | 7 +- example/content-script.ts | 14 -- package-lock.json | 6 +- package.json | 2 +- src/content/circumvention/generic.ts | 2 + src/content/injection.ts | 10 +- src/cosmetics-injection.ts | 197 ++------------------------ src/data-view.ts | 2 + src/engine/bucket/cosmetics.ts | 199 ++++++--------------------- src/engine/engine.ts | 47 ++++--- src/fetch.ts | 2 +- src/matching/cosmetics.ts | 45 +++--- src/parsing/cosmetic-filter.ts | 85 ++++-------- src/serialization.ts | 50 ++++--- src/utils.ts | 23 ++-- test/engine.test.ts | 10 +- test/parsing.test.ts | 41 ------ test/serialization.test.ts | 14 +- test/utils.test.ts | 8 +- 20 files changed, 201 insertions(+), 568 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f3a0848df..ca1403badf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ *not released yet* + * Fix style injection and cosmetic filtering logic [#67](https://github.com/cliqz-oss/adblocker/pull/67) + * All cosmetics are now using only one background action (instead of two) + * No unloading is needed in content-script anymore + * Simplified and optimized the implementation of CosmeticBucket + * Internalized the version of serialized engine for auto-invalidation on update * Fix cosmetic matching (tokenization bug) [#65](https://github.com/cliqz-oss/adblocker/pull/65) * Optimize serialization and properly handle unicode in filters [#61](https://github.com/cliqz-oss/adblocker/pull/61) diff --git a/example/background.ts b/example/background.ts index ae046cb046..d0f78828d4 100644 --- a/example/background.ts +++ b/example/background.ts @@ -11,7 +11,6 @@ function loadAdblocker() { loadCosmeticFilters: true, loadNetworkFilters: true, optimizeAOT: true, - version: 1, }); console.log('Fetching resources...'); @@ -143,10 +142,8 @@ loadAdblocker().then((engine) => { } // Answer to content-script with a list of nodes - if (msg.action === 'getCosmeticsForDomain') { - sendResponse(engine.getDomainFilters(hostname)); - } else if (msg.action === 'getCosmeticsForNodes') { - sendResponse(engine.getCosmeticsFilters(hostname, msg.args[0])); + if (msg.action === 'getCosmeticsFilters') { + sendResponse(engine.getCosmeticsFilters(hostname)); } }); diff --git a/example/content-script.ts b/example/content-script.ts index 14202bec57..f66ca40d9c 100644 --- a/example/content-script.ts +++ b/example/content-script.ts @@ -45,17 +45,3 @@ const backgroundAction = (action, ...args): Promise => { const injection = new CosmeticsInjection(window, backgroundAction); injection.injectCircumvention(); - -/** - * Make sure we clean-up all resources and event listeners when this content - * script is unloaded (stop MutationObserver, etc.). - */ -const onUnload = () => { - injection.unload(); - window.removeEventListener('unload', onUnload); -}; - -/** - * Make sure we clean-up when content script is unloaded. - */ -window.addEventListener('unload', onUnload); diff --git a/package-lock.json b/package-lock.json index eeff0f951a..2fa4eda777 100644 --- a/package-lock.json +++ b/package-lock.json @@ -6928,9 +6928,9 @@ } }, "typescript": { - "version": "3.1.6", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.1.6.tgz", - "integrity": "sha512-tDMYfVtvpb96msS1lDX9MEdHrW4yOuZ4Kdc4Him9oU796XldPYF/t2+uKoX0BBa0hXXwDlqYQbXY5Rzjzc5hBA==", + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-3.2.1.tgz", + "integrity": "sha512-jw7P2z/h6aPT4AENXDGjcfHTu5CSqzsbZc6YlUIebTyBAq8XaKp78x7VcSh30xwSCcsu5irZkYZUSFP1MrAMbg==", "dev": true }, "uglify-js": { diff --git a/package.json b/package.json index 66abdbb327..8ca0ef08ec 100644 --- a/package.json +++ b/package.json @@ -53,7 +53,7 @@ "rollup-plugin-node-resolve": "^3.4.0", "ts-jest": "^23.10.4", "tslint": "^5.11.0", - "typescript": "^3.1.6" + "typescript": "^3.2.1" }, "dependencies": { "punycode": "^2.1.1", diff --git a/src/content/circumvention/generic.ts b/src/content/circumvention/generic.ts index e987d8df2d..f09188303d 100644 --- a/src/content/circumvention/generic.ts +++ b/src/content/circumvention/generic.ts @@ -22,6 +22,7 @@ export const swallowOwnErrors = bundle((magic) => { } if (windowOnError instanceof Function) { + // @ts-ignore return windowOnError.apply(this, arguments); } @@ -47,6 +48,7 @@ export const protectConsole = bundle(() => { return; } } + // @ts-ignore return originalLog.apply(originalConsole, arguments); }.bind(console), }); diff --git a/src/content/injection.ts b/src/content/injection.ts index 1ed103a364..cc0d99895e 100644 --- a/src/content/injection.ts +++ b/src/content/injection.ts @@ -55,13 +55,13 @@ export function blockScript(filter: string, doc: Document): void { } export function injectCSSRule(rule: string, doc: Document): void { - const css = doc.createElement('style'); - css.type = 'text/css'; - css.id = 'cliqz-adblokcer-css-rules'; - const parent = doc.head || doc.documentElement; + const parent = doc.head || doc.getElementsByTagName('head')[0] || doc.documentElement; if (parent !== null) { - parent.appendChild(css); + const css = doc.createElement('style'); + css.type = 'text/css'; + css.id = 'cliqz-adblokcer-css-rules'; css.appendChild(doc.createTextNode(rule)); + parent.appendChild(css); } } diff --git a/src/cosmetics-injection.ts b/src/cosmetics-injection.ts index 60f525fcda..e78473d8dc 100644 --- a/src/cosmetics-injection.ts +++ b/src/cosmetics-injection.ts @@ -1,15 +1,6 @@ import injectCircumvention from './content/circumvention'; import { blockScript, injectCSSRule, injectScript } from './content/injection'; -// We need this as `MutationObserver` is currently not part of the `Window` type -// provided by typescript, although it should be! This will be erased at compile -// time so it has no impact on produced code. -declare global { - interface Window { - MutationObserver?: typeof MutationObserver; - } -} - interface IMessageFromBackground { active: boolean; scripts: string[]; @@ -22,7 +13,6 @@ interface IMessageFromBackground { * - Inject scripts. * - Block scripts. * - Inject CSS rules. - * - Monitor changes using a mutation observer and inject new rules if needed. * * All this happens by communicating with the background through the * `backgroundAction` function (to trigger request the sending of new rules @@ -30,57 +20,11 @@ interface IMessageFromBackground { * callback to apply new rules. */ export default class CosmeticInjection { - private window: Window; - - // TODO: split into two callbacks: - // 1. getCosmeticsForDomain - // 2. getCosmeticsForNodes - // Each of them could return a promise resolving to the filters to be injected - // in the page, if any. Currently the communication is async, but a - // promise-based API would be nicer to use. - private backgroundAction: (action: string, ...args: any[]) => Promise; - private injectedRules: Set; - private injectedScripts: Set; - private blockedScripts: Set; - - private observedNodes: Set; - private mutationObserver: MutationObserver | null; - constructor( - window: Window, + private readonly window: Window, backgroundAction: (action: string, ...args: any[]) => Promise, - useMutationObserver = true, ) { - this.window = window; - this.backgroundAction = backgroundAction; - - this.mutationObserver = null; - this.injectedRules = new Set(); - this.injectedScripts = new Set(); - this.blockedScripts = new Set(); - - this.observedNodes = new Set(); - - // Request cosmetics specific to this domain as soon as possible - this.backgroundAction('getCosmeticsForDomain'); - - if (useMutationObserver) { - // Request cosmetics for nodes already existing in the DOM - this.onMutation([{ target: this.window.document.body }]); - - // Register MutationObserver - this.startObserving(); - } - } - - public unload() { - if (this.mutationObserver) { - try { - this.mutationObserver.disconnect(); - } catch (e) { - /* in case the page is closed */ - } - } + backgroundAction('getCosmeticsFilters'); } public injectCircumvention(): void { @@ -94,150 +38,27 @@ export default class CosmeticInjection { styles, }: IMessageFromBackground) { if (!active) { - this.unload(); return; } // Inject scripts for (let i = 0; i < scripts.length; i += 1) { - const script = scripts[i]; - if (!this.injectedScripts.has(script)) { - injectScript(script, this.window.document); - this.injectedScripts.add(script); - } + injectScript(scripts[i], this.window.document); } // Block scripts for (let i = 0; i < blockedScripts.length; i += 1) { - const script = blockedScripts[i]; - if (!this.blockedScripts.has(script)) { - blockScript(script, this.window.document); - this.blockedScripts.add(script); - } + blockScript(blockedScripts[i], this.window.document); } + // Inject CSS this.handleRules(styles); } private handleRules(rules: string[]) { - const rulesToInject: string[] = []; - - // Check which rules should be injected in the page. - for (let i = 0; i < rules.length; i += 1) { - const rule = rules[i]; - - if (!this.injectedRules.has(rule)) { - // Check if the selector would match - try { - if (!this.window.document.querySelector(rule)) { - continue; - } - } catch (e) { - // invalid selector - continue; - } - - this.injectedRules.add(rule); - rulesToInject.push(` :root ${rule}`); - } - } - - // Inject selected rules - if (rulesToInject.length > 0) { - injectCSSRule( - `${rulesToInject.join(' ,')} {display:none !important;}`, - this.window.document, - ); - } - } - - /** - * When one or several mutations occur in the window, extract caracteristics - * (node name, class, tag) from the modified nodes and request matching - * cosmetic filters to inject in the page. - */ - private onMutation(mutations: Array<{ target: Node }>) { - let targets: Set = new Set(mutations.map((m) => m.target).filter((t) => t)); - - // TODO - it might be necessary to inject scripts, CSS and block scripts - // from here into iframes with no src. We could first inject/block - // everything already present in `this.injectedRules`, - // `this.injectedScripts` and `this.blockedScripts`. Then we could register - // the iframe to be subjected to the same future injections as the current - // window. - // targets.forEach((target) => { - // if (target.localName === 'iframe') {} - // if (target.childElementCount !== 0) { - // const iframes = target.getElementsByTagName('iframe'); - // if (iframes.length !== 0) {} - // } - // }); - - if (targets.size > 100) { - // In case there are too many mutations we will only check once the whole document - targets = new Set([this.window.document.body]); - } - - if (targets.size === 0) { - return; - } - - // Collect nodes of targets - const nodeInfo = new Set(); - targets.forEach((target) => { - const nodes = (target as HTMLElement).querySelectorAll('*'); - for (let i = 0; i < nodes.length; i += 1) { - const node = nodes[i] as HTMLElement; - - // Ignore hidden nodes - if (node.hidden) { - continue; - } - - if (node.id) { - const selector = `#${node.id}`; - if (!this.observedNodes.has(selector)) { - nodeInfo.add(selector); - this.observedNodes.add(selector); - } - } - - if (node.tagName) { - const selector = node.tagName; - if (!this.observedNodes.has(selector)) { - nodeInfo.add(selector); - this.observedNodes.add(selector); - } - } - - if (node.className && node.className.split) { - node.className.split(' ').forEach((name) => { - const selector = `.${name}`; - if (!this.observedNodes.has(selector)) { - nodeInfo.add(selector); - this.observedNodes.add(selector); - } - }); - } - } - }); - - // Send node info to background to request corresponding cosmetic filters - if (nodeInfo.size > 0) { - this.backgroundAction('getCosmeticsForNodes', [[...nodeInfo]]); - } - } - - private startObserving() { - // Attach mutation observer in case the DOM is mutated. - if (this.window.MutationObserver !== undefined) { - this.mutationObserver = new this.window.MutationObserver((mutations) => - this.onMutation(mutations), - ); - this.mutationObserver.observe(this.window.document, { - childList: true, - subtree: true, - }); - } + injectCSSRule( + `${rules.join(',')} { display: none!important; }`, + this.window.document, + ); } } diff --git a/src/data-view.ts b/src/data-view.ts index 231a6c1ff8..695de6bd63 100644 --- a/src/data-view.ts +++ b/src/data-view.ts @@ -164,6 +164,8 @@ export default class StaticDataView { } this.pos += byteLength; + + // @ts-ignore return String.fromCharCode.apply(null, this.buffer.subarray(this.pos - byteLength, this.pos)); } } diff --git a/src/engine/bucket/cosmetics.ts b/src/engine/bucket/cosmetics.ts index 2df4f34ba8..a84c6ebbac 100644 --- a/src/engine/bucket/cosmetics.ts +++ b/src/engine/bucket/cosmetics.ts @@ -1,197 +1,78 @@ import matchCosmeticFilter from '../../matching/cosmetics'; import { CosmeticFilter } from '../../parsing/cosmetic-filter'; -import { fastHash, fastStartsWith, tokenize } from '../../utils'; +import { tokenizeHostnames } from '../../utils'; import ReverseIndex from '../reverse-index'; export default class CosmeticFilterBucket { - public hostnameIndex: ReverseIndex; - public selectorIndex: ReverseIndex; + public readonly hostnameIndex: ReverseIndex; + + // TODO - make readonly + public genericRules: CosmeticFilter[]; public size: number; constructor(filters: (cb: (f: CosmeticFilter) => void) => void) { + // Store generic cosmetic filters in an array. It will be used whenever we + // need to inject cosmetics in a paged and filtered according to + // domain-specific exceptions/unhide. + this.genericRules = []; + // This accelerating data structure is used to retrieve cosmetic filters for // a given hostname. We only store filters having at least one hostname // specified and we index each filter several time (one time per hostname). this.hostnameIndex = new ReverseIndex( - (cb: (f: CosmeticFilter) => void) => + (cb: (f: CosmeticFilter) => void) => { filters((f: CosmeticFilter) => { if (f.hasHostnames()) { cb(f); + } else { + this.genericRules.push(f); } - }), - (filter: CosmeticFilter) => { - const multiTokens: Uint32Array[] = []; - if (filter.hostnames !== undefined) { - filter.hostnames.split(',').forEach((h: string) => { - multiTokens.push(tokenize(h)); - }); - } - return multiTokens; + }); }, + (filter: CosmeticFilter) => filter.getTokens(), ); - // Store cosmetic filters dispatched using their selector. This will allow a - // fast look-up when we need to get a set of rules to inject in a window, - // based on some node information. - this.selectorIndex = new ReverseIndex( - (cb: (f: CosmeticFilter) => void) => - filters((f: CosmeticFilter) => { - if (!(f.isScriptBlock() || f.isScriptInject())) { - cb(f); - } - }), - (filter) => filter.getTokens(), - ); - - this.size = this.hostnameIndex.size + this.selectorIndex.size; + this.size = this.hostnameIndex.size + this.genericRules.length; } - public createContentScriptResponse( - rules: CosmeticFilter[], - ): { - active: boolean; - blockedScripts: string[]; - scripts: string[]; - styles: string[]; - } { - const styles: string[] = []; - const scripts: string[] = []; - const blockedScripts: string[] = []; - - for (let i = 0; i < rules.length; i += 1) { - const rule: CosmeticFilter = rules[i]; - const selector: string = rule.getSelector(); - - if (rule.isScriptBlock()) { - blockedScripts.push(selector); - } else if (rule.isScriptInject()) { - scripts.push(selector); - } else { - styles.push(selector); - } - } - - return { - active: true, - blockedScripts, - scripts, - styles, - }; - } - - public getDomainRules(hostname: string, js: Map) { - // Collect matching rules - const rules: Array<{ rule: CosmeticFilter; hostname: string }> = []; - const checkMatch = (rule: CosmeticFilter) => { - const result = matchCosmeticFilter(rule, hostname); - if (result !== null) { - // Update script injection rule - if (rule.isScriptInject()) { - const ruleWithScript = new CosmeticFilter(rule); - let scriptName = rule.getSelector(); - let scriptArguments: string[] = []; - if (scriptName.indexOf(',') !== -1) { - const parts = scriptName.split(','); - scriptName = parts[0]; - scriptArguments = parts.slice(1).map((s) => s.trim()); - } - - let script = js.get(scriptName); - if (script !== undefined) { - for (let i = 0; i < scriptArguments.length; i += 1) { - script = script.replace(`{{${i + 1}}}`, scriptArguments[i]); - } + public getCosmeticsFilters(hostname: string) { + const disabledRules = new Set(); + const rules: CosmeticFilter[] = []; - ruleWithScript.selector = script; - rules.push({ - hostname: result.hostname, - rule: ruleWithScript, - }); - } // TODO - else throw an exception? + // Collect rules specifying a domain + this.hostnameIndex.iterMatchingFilters(tokenizeHostnames(hostname), (rule: CosmeticFilter) => { + if (matchCosmeticFilter(rule, hostname)) { + if (rule.isUnhide()) { + disabledRules.add(rule.getSelector()); } else { - rules.push({ - hostname: result.hostname, - rule, - }); + rules.push(rule); } } return true; - }; - - this.hostnameIndex.iterMatchingFilters(tokenize(hostname), checkMatch); - - return this.filterExceptions(rules); - } + }); - public getMatchingRules(hostname: string, nodeInfo: string[][]): CosmeticFilter[] { - // Collect all selectors - const tokens: Set = new Set(); - for (let i = 0; i < nodeInfo.length; i += 1) { - const node = nodeInfo[i]; - // For each attribute of the node: [id, tagName, className] = node - for (let j = 0; j < node.length; j += 1) { - tokens.add(fastHash(node[j])); - } + if (disabledRules.size === 0) { + // No exception/unhide found, so we return all the rules + return [...rules, ...this.genericRules]; } - // Collect matching rules - const rules: Array<{ hostname: string; rule: CosmeticFilter }> = []; - const checkMatch = (rule: CosmeticFilter) => { - const result = matchCosmeticFilter(rule, hostname); - if (result !== null) { - rules.push({ - hostname: result.hostname, - rule, - }); - } - - return true; - }; - - this.selectorIndex.iterMatchingFilters(new Uint32Array(tokens), checkMatch); - - return this.filterExceptions(rules); - } - - private filterExceptions( - matches: Array<{ rule: CosmeticFilter; hostname: string }>, - ): CosmeticFilter[] { - const matchingRules = new Map(); - - for (let i = 0; i < matches.length; i += 1) { - const { rule, hostname } = matches[i]; - const selector = rule.getSelector(); - const isException = fastStartsWith(hostname, '~'); - if (matchingRules.has(selector)) { - const otherRule = matchingRules.get(selector); - - if (rule.isUnhide() || isException || hostname.length > otherRule.hostname.length) { - // Take the longest hostname - matchingRules.set(selector, { - hostname, - isException, - rule, - }); - } - } else { - // Add rule - matchingRules.set(selector, { - hostname, - isException, - rule, - }); + const rulesWithoutExceptions: CosmeticFilter[] = []; + for (let i = 0; i < rules.length; i += 1) { + const rule = rules[i]; + if (!disabledRules.has(rule.getSelector())) { + rulesWithoutExceptions.push(rule); } } - const rules: CosmeticFilter[] = []; - matchingRules.forEach(({ rule, isException }) => { - if (!isException && !rule.isUnhide()) { - rules.push(rule); + for (let i = 0; i < this.genericRules.length; i += 1) { + const rule = this.genericRules[i]; + if (!disabledRules.has(rule.getSelector())) { + rulesWithoutExceptions.push(rule); } - }); + } - return rules; + return rulesWithoutExceptions; } } diff --git a/src/engine/engine.ts b/src/engine/engine.ts index e0c0bb9088..d45b31204b 100644 --- a/src/engine/engine.ts +++ b/src/engine/engine.ts @@ -42,11 +42,9 @@ interface IOptions { loadNetworkFilters: boolean; optimizeAOT: boolean; enableOptimizations: boolean; - version: number; } export default class FilterEngine { - public version: number; public lists: Map; public csp: NetworkFilterBucket; @@ -72,14 +70,12 @@ export default class FilterEngine { loadCosmeticFilters = true, loadNetworkFilters = true, optimizeAOT = true, - version, }: IOptions) { // Options this.loadCosmeticFilters = loadCosmeticFilters; this.loadNetworkFilters = loadNetworkFilters; this.optimizeAOT = optimizeAOT; this.enableOptimizations = enableOptimizations; - this.version = version; this.lists = new Map(); this.size = 0; @@ -255,24 +251,35 @@ export default class FilterEngine { // this.cosmetics.optimizeAheadOfTime(); } - public getCosmeticsFilters(hostname: string, nodes: string[][]) { - if (!this.loadCosmeticFilters) { - return this.cosmetics.createContentScriptResponse([]); - } - - return this.cosmetics.createContentScriptResponse( - this.cosmetics.getMatchingRules(hostname, nodes), - ); - } - - public getDomainFilters(hostname: string) { - if (!this.loadCosmeticFilters) { - return this.cosmetics.createContentScriptResponse([]); + public getCosmeticsFilters(hostname: string) { + const styles: string[] = []; + const scripts: string[] = []; + const blockedScripts: string[] = []; + + if (this.loadCosmeticFilters) { + const rules = this.cosmetics.getCosmeticsFilters(hostname); + for (let i = 0; i < rules.length; i += 1) { + const rule: CosmeticFilter = rules[i]; + + if (rule.isScriptBlock()) { + blockedScripts.push(rule.getSelector()); + } else if (rule.isScriptInject()) { + const script = rule.getScript(this.js); + if (script !== undefined) { + scripts.push(script); + } + } else { + styles.push(rule.getSelector()); + } + } } - return this.cosmetics.createContentScriptResponse( - this.cosmetics.getDomainRules(hostname, this.js), - ); + return { + active: this.loadCosmeticFilters, + blockedScripts, + scripts, + styles, + }; } public matchAll(rawRequest: Partial): Set { diff --git a/src/fetch.ts b/src/fetch.ts index 3560779052..a4993246e2 100644 --- a/src/fetch.ts +++ b/src/fetch.ts @@ -20,7 +20,7 @@ const lists = [ { category: Category.Country, country: 'de', - enabledByDefault: false, + enabledByDefault: true, url: 'https://easylist-downloads.adblockplus.org/easylistgermany.txt', }, { diff --git a/src/matching/cosmetics.ts b/src/matching/cosmetics.ts index b9342b963e..5c2d7e7229 100644 --- a/src/matching/cosmetics.ts +++ b/src/matching/cosmetics.ts @@ -1,20 +1,17 @@ import { getPublicSuffix } from 'tldts'; import { CosmeticFilter } from '../parsing/cosmetic-filter'; -import { fastStartsWith } from '../utils'; /* Checks that hostnamePattern matches at the end of the hostname. * Partial matches are allowed, but hostname should be a valid * subdomain of hostnamePattern. */ -function checkHostnamesPartialMatch(hostname: string, hostnamePattern: string): boolean { - let pattern = hostnamePattern; - if (fastStartsWith(hostnamePattern, '~')) { - pattern = pattern.slice(1); - } - - if (hostname.endsWith(pattern)) { - const patternIndex = hostname.length - pattern.length; +function checkHostnamesPartialMatch( + hostname: string, + hostnamePattern: string, +): boolean { + if (hostname.endsWith(hostnamePattern)) { + const patternIndex = hostname.length - hostnamePattern.length; if (patternIndex === 0 || hostname[patternIndex - 1] === '.') { return true; } @@ -29,7 +26,10 @@ function checkHostnamesPartialMatch(hostname: string, hostnamePattern: string): * It takes care of the concept of entities introduced by uBlock: google.* * https://github.com/gorhill/uBlock/wiki/Static-filter-syntax#entity-based-cosmetic-filters */ -function matchHostname(hostname: string, hostnamePattern: string): boolean { +function matchHostname( + hostname: string, + hostnamePattern: string, +): boolean { if (hostnamePattern.endsWith('.*')) { // Match entity: const entity = hostnamePattern.slice(0, -2); @@ -53,24 +53,31 @@ function matchHostname(hostname: string, hostnamePattern: string): boolean { return checkHostnamesPartialMatch(hostname, hostnamePattern); } -export default function matchCosmeticFilter( - filter: CosmeticFilter, - hostname: string, -): { hostname: string } | null { +export default function matchCosmeticFilter(filter: CosmeticFilter, hostname: string): boolean { // Check hostnames if (filter.hasHostnames()) { if (hostname) { const hostnames = filter.getHostnames(); + + // Check for exceptions for (let i = 0; i < hostnames.length; i += 1) { - if (matchHostname(hostname, hostnames[i])) { - return { hostname: hostnames[i] }; + const filterHostname = hostnames[i]; + if (filterHostname[0] === '~' && matchHostname(hostname, filterHostname.slice(1))) { + return false; + } + } + + // Check for positive matches + for (let i = 0; i < hostnames.length; i += 1) { + const filterHostname = hostnames[i]; + if (filterHostname[0] !== '~' && matchHostname(hostname, filterHostname)) { + return true; } } } - // No hostname match - return null; + return false; } - return { hostname: '' }; + return true; } diff --git a/src/parsing/cosmetic-filter.ts b/src/parsing/cosmetic-filter.ts index ead9b2a3fc..6ff51c0244 100644 --- a/src/parsing/cosmetic-filter.ts +++ b/src/parsing/cosmetic-filter.ts @@ -1,5 +1,5 @@ import * as punycode from 'punycode'; -import { fastStartsWithFrom, getBit, hasUnicode, setBit, tokenizeCSS } from '../utils'; +import { fastStartsWithFrom, getBit, hasUnicode, setBit, tokenizeHostnames } from '../utils'; import IFilter from './interface'; /** @@ -33,8 +33,6 @@ function computeFilterId( return hash >>> 0; } -const TOKENS_BUFFER = new Uint32Array(200); - /*************************************************************************** * Cosmetic filters parsing * ************************************************************************ */ @@ -55,7 +53,7 @@ const TOKENS_BUFFER = new Uint32Array(200); */ export class CosmeticFilter implements IFilter { public readonly mask: number; - public selector?: string; // TODO - set to read-only + public readonly selector?: string; public readonly hostnames?: string; public id?: number; @@ -118,70 +116,39 @@ export class CosmeticFilter implements IFilter { return filter; } - public getId(): number { - if (this.id === undefined) { - this.id = computeFilterId(this.mask, this.selector, this.hostnames); - } - return this.id; - } - public getTokens(): Uint32Array[] { - return [this.getTokensSelector()]; + if (this.hostnames !== undefined) { + return this.hostnames.split(',').map(tokenizeHostnames); + } + return []; } - public getTokensSelector(): Uint32Array { - // These filters are only matched based on their domains, not selectors - if (this.isScriptInject() || this.isScriptBlock()) { - return new Uint32Array([]); + public getScript(js: Map): string | undefined { + let scriptName = this.getSelector(); + let scriptArguments: string[] = []; + if (scriptName.indexOf(',') !== -1) { + const parts = scriptName.split(','); + scriptName = parts[0]; + scriptArguments = parts.slice(1).map((s) => s.trim()); } - const selector = this.selector || ''; - - // Only keep the part after the last combinator: '>', '+', '~' - let sepIndex = 0; - for (let i = selector.length - 1; i >= 0; i -= 1) { - const code = selector.charCodeAt(i); - if ( - code === 43 || // '+' - code === 62 || // '>' - code === 126 // '~' - ) { - sepIndex = i; - break; + let script = js.get(scriptName); + if (script !== undefined) { + for (let i = 0; i < scriptArguments.length; i += 1) { + script = script.replace(`{{${i + 1}}}`, scriptArguments[i]); } - } - // We do not want to take styles contained in brackets () into account while - // extracting the tokens, so we loop over the selector and ignore these - // parts. - let inside = 0; // number of brackets openings seen, allows to handle multiple levels of depth - let start = sepIndex; - let tokensBufferIndex = 0; - - for (let i = sepIndex, len = selector.length; i < len; i += 1) { - const code = selector.charCodeAt(i); - if (code === 91) { - // '[' - if (inside === 0 && start < i) { - const tokens = tokenizeCSS(selector.slice(start, i)); - TOKENS_BUFFER.set(tokens, tokensBufferIndex); - tokensBufferIndex += tokens.length; - } - inside += 1; - } else if (code === 93) { - // ']' - inside -= 1; - start = i + 1; - } - } + return script; + } // TODO - else throw an exception? - if (inside === 0 && start < selector.length) { - const tokens = tokenizeCSS(selector.slice(start, selector.length)); - TOKENS_BUFFER.set(tokens, tokensBufferIndex); - tokensBufferIndex += tokens.length; - } + return undefined; + } - return TOKENS_BUFFER.slice(0, tokensBufferIndex); + public getId(): number { + if (this.id === undefined) { + this.id = computeFilterId(this.mask, this.selector, this.hostnames); + } + return this.id; } public getSelector(): string { diff --git a/src/serialization.ts b/src/serialization.ts index 2579adebcd..678ad34c8a 100644 --- a/src/serialization.ts +++ b/src/serialization.ts @@ -11,6 +11,8 @@ import { CosmeticFilter } from './parsing/cosmetic-filter'; import IFilter from './parsing/interface'; import { NetworkFilter } from './parsing/network-filter'; +export const ENGINE_VERSION = 15; + /** * To allow for a more compact representation of network filters, the * representation is composed of a mandatory header, and some optional @@ -277,26 +279,21 @@ function deserializeLists( }; } -function serializeBucket(token: number, filters: T[], buffer: StaticDataView) { +function serializeListOfFilter(filters: T[], buffer: StaticDataView) { buffer.pushUint16(filters.length); - buffer.pushUint32(token); - for (let i = 0; i < filters.length; i += 1) { buffer.pushUint32(filters[i].getId()); } } -function deserializeBucket( - buffer: StaticDataView, - filters: Map, -): { - token: number; - bucket: IBucket; -} { - const bucket: T[] = []; +function serializeBucket(token: number, filters: T[], buffer: StaticDataView) { + buffer.pushUint32(token); + serializeListOfFilter(filters, buffer); +} +function deserializeListOfFilters(buffer: StaticDataView, filters: Map): T[] { + const bucket: T[] = []; const length = buffer.getUint16(); - const token = buffer.getUint32(); for (let i = 0; i < length; i += 1) { const filter = filters.get(buffer.getUint32()); @@ -305,8 +302,20 @@ function deserializeBucket( } } + return bucket; +} + +function deserializeBucket( + buffer: StaticDataView, + filters: Map, +): { + token: number; + bucket: IBucket; +} { + const token = buffer.getUint32(); + return { - bucket: newBucket(bucket), + bucket: newBucket(deserializeListOfFilters(buffer, filters)), token, }; } @@ -403,8 +412,7 @@ function serializeEngine(engine: Engine): Uint8Array { // `StaticDataView` is able to resize itself dynamically if needed. const buffer = new StaticDataView(8000000); - buffer.pushUint8(engine.version); - + buffer.pushUint8(ENGINE_VERSION); buffer.pushUint8(Number(engine.enableOptimizations)); buffer.pushUint8(Number(engine.loadCosmeticFilters)); buffer.pushUint8(Number(engine.loadNetworkFilters)); @@ -422,19 +430,19 @@ function serializeEngine(engine: Engine): Uint8Array { serializeReverseIndex(engine.importants.index, buffer); serializeReverseIndex(engine.redirects.index, buffer); serializeReverseIndex(engine.cosmetics.hostnameIndex, buffer); - serializeReverseIndex(engine.cosmetics.selectorIndex, buffer); + serializeListOfFilter(engine.cosmetics.genericRules, buffer); return buffer.crop(); } -function deserializeEngine(serialized: Uint8Array, version: number): Engine { +function deserializeEngine(serialized: Uint8Array): Engine { const buffer = new StaticDataView(0, serialized); // Before starting deserialization, we make sure that the version of the // serialized engine is the same as the current source code. If not, we start // fresh and create a new engine from the lists. const serializedEngineVersion = buffer.getUint8(); - if (version !== serializedEngineVersion) { + if (ENGINE_VERSION !== serializedEngineVersion) { throw new Error('serialized engine version mismatch'); } @@ -444,7 +452,6 @@ function deserializeEngine(serialized: Uint8Array, version: number): Engine { loadCosmeticFilters: Boolean(buffer.getUint8()), loadNetworkFilters: Boolean(buffer.getUint8()), optimizeAOT: Boolean(buffer.getUint8()), - version: serializedEngineVersion, }; const engine = new Engine(options); @@ -465,8 +472,11 @@ function deserializeEngine(serialized: Uint8Array, version: number): Engine { deserializeReverseIndex(buffer, engine.exceptions.index, networkFilters); deserializeReverseIndex(buffer, engine.importants.index, networkFilters); deserializeReverseIndex(buffer, engine.redirects.index, networkFilters); + deserializeReverseIndex(buffer, engine.cosmetics.hostnameIndex, cosmeticFilters); - deserializeReverseIndex(buffer, engine.cosmetics.selectorIndex, cosmeticFilters); + engine.cosmetics.genericRules = deserializeListOfFilters(buffer, cosmeticFilters); + + engine.cosmetics.size = engine.cosmetics.hostnameIndex.size + engine.cosmetics.genericRules.length; return engine; } diff --git a/src/utils.ts b/src/utils.ts index eb0bbe46c6..8666c7a013 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -95,19 +95,12 @@ function isAlphaExtended(ch: number): boolean { return ch >= 192 && ch <= 450; } -function isAllowed(ch: number): boolean { +function isAllowedFilter(ch: number): boolean { return isDigit(ch) || isAlpha(ch) || isAlphaExtended(ch) || ch === 37 /* '%' */; } -function isAllowedCSS(ch: number): boolean { - return ( - isDigit(ch) || - isAlpha(ch) || - ch === 95 || // '_' (underscore) - ch === 45 || // '-' (dash) - ch === 46 || // '.' (dot) - ch === 35 // '#' (sharp) - ); +function isAllowedHostname(ch: number): boolean { + return isAllowedFilter(ch) || ch === 95 /* '_' */ || ch === 45 /* '-' */; } const TOKENS_BUFFER = new Uint32Array(200); @@ -191,7 +184,7 @@ function fastTokenizer(pattern: string, isAllowedCode: (ch: number) => boolean): } export function tokenize(pattern: string): Uint32Array { - return fastTokenizerNoRegex(pattern, isAllowed, false, false); + return fastTokenizerNoRegex(pattern, isAllowedFilter, false, false); } export function tokenizeFilter( @@ -199,15 +192,15 @@ export function tokenizeFilter( skipFirstToken: boolean, skipLastToken: boolean, ): Uint32Array { - return fastTokenizerNoRegex(pattern, isAllowed, skipFirstToken, skipLastToken); + return fastTokenizerNoRegex(pattern, isAllowedFilter, skipFirstToken, skipLastToken); } -export function tokenizeCSS(pattern: string): Uint32Array { - return fastTokenizer(pattern, isAllowedCSS); +export function tokenizeHostnames(pattern: string): Uint32Array { + return fastTokenizer(pattern, isAllowedHostname); } export function createFuzzySignature(pattern: string): Uint32Array { - return compactTokens(new Uint32Array(fastTokenizer(pattern, isAllowed))); + return compactTokens(new Uint32Array(fastTokenizer(pattern, isAllowedFilter))); } export function binSearch(arr: Uint32Array, elt: number): boolean { diff --git a/test/engine.test.ts b/test/engine.test.ts index 9e2aae997e..076623f1b0 100644 --- a/test/engine.test.ts +++ b/test/engine.test.ts @@ -1,4 +1,5 @@ import Engine from '../src/engine/engine'; +import { CosmeticFilter } from '../src/parsing/cosmetic-filter'; import requests from './data/requests'; function createEngine(filters: string, enableOptimizations: boolean = true) { @@ -7,7 +8,6 @@ function createEngine(filters: string, enableOptimizations: boolean = true) { loadCosmeticFilters: true, loadNetworkFilters: true, optimizeAOT: true, - version: 1, }); newEngine.onUpdateFilters( @@ -135,7 +135,7 @@ $csp=baz,domain=bar.com url, }), ].forEach((optimizedFilter) => { - (optimizedFilter.rawLine || '').split(' <+> ').forEach((f) => { + (optimizedFilter.rawLine || '').split(' <+> ').forEach((f: string) => { matchingFilters.add(f); }); }); @@ -169,9 +169,9 @@ $csp=baz,domain=bar.com const shouldMatch: Set = new Set(testCase.matches); const shouldNotMatch: Set = new Set(testCase.misMatches); - const rules = engine.cosmetics.getDomainRules(testCase.hostname, engine.js); + const rules = engine.cosmetics.getCosmeticsFilters(testCase.hostname); expect(rules.length).toEqual(shouldMatch.size); - rules.forEach((rule) => { + rules.forEach((rule: CosmeticFilter) => { expect(rule.rawLine).not.toBeNull(); if (rule.rawLine !== undefined && !shouldMatch.has(rule.rawLine)) { throw new Error(`Expected node ${testCase.hostname} ` + ` to match ${rule.rawLine}`); @@ -261,7 +261,7 @@ $csp=baz,domain=bar.com const shouldMatch: Set = new Set(testCase.matches); const shouldNotMatch: Set = new Set(testCase.misMatches); - const rules = engine.cosmetics.getMatchingRules(testCase.hostname, [testCase.node]); + const rules = engine.cosmetics.getCosmeticsFilters(testCase.hostname); expect(rules.length).toEqual(shouldMatch.size); rules.forEach((rule) => { expect(rule.rawLine).not.toBeNull(); diff --git a/test/parsing.test.ts b/test/parsing.test.ts index e9333eb607..1626b468cb 100644 --- a/test/parsing.test.ts +++ b/test/parsing.test.ts @@ -773,47 +773,6 @@ describe('Cosmetic filters', () => { selector: 'script.js, arg1, arg2, arg3', }); }); - - describe('tokenizes filter', () => { - [ - // Plain selectors - { selector: '.c', tokens: ['.c'] }, - { selector: '.c.d', tokens: ['.c.d'] }, - { selector: '.c .d', tokens: ['.c', '.d'] }, - - // With styles included (brackets) - { selector: '.c[foo]', tokens: ['.c'] }, - { selector: '[foo].c', tokens: ['.c'] }, - { selector: '[foo].c[foo]', tokens: ['.c'] }, - { selector: '[foo[bar]].c[foo]', tokens: ['.c'] }, - { selector: '[foo[bar]].c[foo].d', tokens: ['.c', '.d'] }, - { selector: '[foo[bar]].c[foo[baz]].d', tokens: ['.c', '.d'] }, - { selector: '.c[foo[bar]].d[foo[baz]].e', tokens: ['.c', '.d', '.e'] }, - - // With combinators - { selector: '.b > .c', tokens: ['.c'] }, - { selector: '.a ~ .b > .c', tokens: ['.c'] }, - { selector: '.a ~ .b ~ .c', tokens: ['.c'] }, - { selector: '.a + .b ~ .c', tokens: ['.c'] }, - { selector: '.a + .b + .c', tokens: ['.c'] }, - - // With combinators + styles - { selector: '.c[foo[bar]].d[foo[baz]].e > .c', tokens: ['.c'] }, - { selector: '.a > .c[foo[bar]].d[foo[baz]].e ~ .c', tokens: ['.c'] }, - { selector: '.a > .c[foo[bar]].d[foo[baz]].e ~ .c[foo]', tokens: ['.c'] }, - { selector: '.a > .c[foo[bar]].d[foo[baz]].e ~ .c[foo[bar]].d', tokens: ['.c', '.d'] }, - ].forEach((testCase) => { - it(testCase.selector, () => { - const parsed = parseCosmeticFilter(`##${testCase.selector}`); - expect(parsed).not.toBeNull(); - if (parsed !== null) { - expect(parsed.getTokensSelector()).toEqual( - new Uint32Array(testCase.tokens.map(fastHash)), - ); - } - }); - }); - }); }); describe('Filters list', () => { diff --git a/test/serialization.test.ts b/test/serialization.test.ts index ff5dae7f7e..f86ed3667a 100644 --- a/test/serialization.test.ts +++ b/test/serialization.test.ts @@ -77,7 +77,6 @@ describe('Serialization', () => { loadCosmeticFilters: true, loadNetworkFilters: true, optimizeAOT: false, - version: 42, }); engine.onUpdateFilters([{ filters, asset: 'list1', checksum: 'checksum' }]); @@ -85,14 +84,17 @@ describe('Serialization', () => { engine.onUpdateResource([{ checksum: 'resources1', filters: resources }]); const serialized = serializeEngine(engine); + + const version = serialized[0]; + serialized[0] = 1; // override version expect(() => { - deserializeEngine(serialized, 41); + deserializeEngine(serialized); }).toThrow('serialized engine version mismatch'); + serialized[0] = version; - const deserialized = deserializeEngine(serialized, 42); + const deserialized = deserializeEngine(serialized); expect(deserialized).not.toBe(null); if (deserialized !== null) { - expect(deserialized.version).toEqual(engine.version); expect(deserialized.lists).toEqual(engine.lists); // NOTE: Here we only compare the index itself, and not the other @@ -110,8 +112,8 @@ describe('Serialization', () => { expect(deserialized.cosmetics.hostnameIndex.index).toEqual( engine.cosmetics.hostnameIndex.index, ); - expect(deserialized.cosmetics.selectorIndex.index).toEqual( - engine.cosmetics.selectorIndex.index, + expect(deserialized.cosmetics.genericRules).toEqual( + engine.cosmetics.genericRules, ); // Resources diff --git a/test/utils.test.ts b/test/utils.test.ts index b6bf9439f3..618db55525 100644 --- a/test/utils.test.ts +++ b/test/utils.test.ts @@ -1,5 +1,5 @@ import { parseList } from '../src/parsing/list'; -import { fastHash, tokenize, tokenizeCSS } from '../src/utils'; +import { fastHash, tokenize } from '../src/utils'; import requests from './data/requests'; import { loadAllLists } from './utils'; @@ -63,10 +63,4 @@ describe('Utils', () => { expect(tokenize('foo-bar')).toEqual(t(['foo', 'bar'])); expect(tokenize('foo.bar')).toEqual(t(['foo', 'bar'])); }); - - it('#tokenizeCSS', () => { - expect(tokenizeCSS('')).toEqual(t([])); - expect(tokenizeCSS('.selector')).toEqual(t(['.selector'])); - expect(tokenizeCSS('.selector-foo')).toEqual(t(['.selector-foo'])); - }); });