Skip to content

Commit

Permalink
provide helpers to download and build engines from lists
Browse files Browse the repository at this point in the history
  • Loading branch information
remusao committed Aug 16, 2019
1 parent 092d99f commit 192836b
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 168 deletions.
53 changes: 2 additions & 51 deletions packages/adblocker-electron-example/index.ts
Original file line number Diff line number Diff line change
@@ -1,56 +1,7 @@
import { app, BrowserWindow, session } from 'electron';
import fetch from 'node-fetch';

import { ElectronBlocker, fetchLists, fetchResources, Request } from '@cliqz/adblocker-electron';

// Polyfill fetch API for Node.js environment
// @ts-ignore
global.fetch = fetch;

/**
* Initialize the adblocker using lists of filters and resources. It returns a
* Promise resolving on the `Engine` that we will use to decide what requests
* should be blocked or altered.
*/
async function loadAdblocker(): Promise<ElectronBlocker> {
console.log('Fetching resources...');
return Promise.all([fetchLists(), fetchResources()]).then(([responses, resources]) => {
console.log('Initialize adblocker...');
const deduplicatedLines = new Set();
for (let i = 0; i < responses.length; i += 1) {
const lines = responses[i].split(/\n/g);
for (let j = 0; j < lines.length; j += 1) {
deduplicatedLines.add(lines[j]);
}
}
const deduplicatedFilters = Array.from(deduplicatedLines).join('\n');

let t0 = Date.now();
const engine = ElectronBlocker.parse(deduplicatedFilters, {
enableCompression: true,
});
let total = Date.now() - t0;
console.log('parsing filters', total);

t0 = Date.now();
engine.updateResources(resources, '' + resources.length);
total = Date.now() - t0;
console.log('parsing resources', total);

t0 = Date.now();
const serialized = engine.serialize();
total = Date.now() - t0;
console.log('serialization', total);
console.log('size', serialized.byteLength);

t0 = Date.now();
const deserialized = ElectronBlocker.deserialize(serialized);
total = Date.now() - t0;
console.log('deserialization', total);

return deserialized as ElectronBlocker;
});
}
import { ElectronBlocker, fullLists, Request } from '@cliqz/adblocker-electron';

function getUrlToLoad(): string {
let url = 'https://www.mangareader.net/';
Expand All @@ -73,7 +24,7 @@ async function createWindow() {
throw new Error('defaultSession is undefined');
}

const engine = await loadAdblocker();
const engine = await ElectronBlocker.fromLists(fetch, fullLists);
engine.enableBlockingInSession(session.defaultSession);

engine.on('request-blocked', (request: Request) => {
Expand Down
53 changes: 2 additions & 51 deletions packages/adblocker-puppeteer-example/index.ts
Original file line number Diff line number Diff line change
@@ -1,58 +1,9 @@
import { fetchLists, fetchResources, PuppeteerBlocker, Request } from '@cliqz/adblocker-puppeteer';
import { fullLists, PuppeteerBlocker, Request } from '@cliqz/adblocker-puppeteer';
import fetch from 'node-fetch';
import puppeteer from 'puppeteer';

// Polyfill fetch API for Node.js environment
// @ts-ignore
global.fetch = fetch;

/**
* Initialize the adblocker using lists of filters and resources. It returns a
* Promise resolving on the `Engine` that we will use to decide what requests
* should be blocked or altered.
*/
async function loadAdblocker(): Promise<PuppeteerBlocker> {
console.log('Fetching resources...');
return Promise.all([fetchLists(), fetchResources()]).then(([responses, resources]) => {
console.log('Initialize adblocker...');
const deduplicatedLines = new Set();
for (let i = 0; i < responses.length; i += 1) {
const lines = responses[i].split(/\n/g);
for (let j = 0; j < lines.length; j += 1) {
deduplicatedLines.add(lines[j]);
}
}
const deduplicatedFilters = Array.from(deduplicatedLines).join('\n');

let t0 = Date.now();
const engine = PuppeteerBlocker.parse(deduplicatedFilters, {
enableCompression: true,
});
let total = Date.now() - t0;
console.log('parsing filters', total);

t0 = Date.now();
engine.updateResources(resources, '' + resources.length);
total = Date.now() - t0;
console.log('parsing resources', total);

t0 = Date.now();
const serialized = engine.serialize();
total = Date.now() - t0;
console.log('serialization', total);
console.log('size', serialized.byteLength);

t0 = Date.now();
const deserialized = PuppeteerBlocker.deserialize(serialized);
total = Date.now() - t0;
console.log('deserialization', total);

return deserialized as PuppeteerBlocker;
});
}

(async () => {
const engine = await loadAdblocker();
const engine = await PuppeteerBlocker.fromLists(fetch, fullLists);
const browser = await puppeteer.launch({
defaultViewport: null,
headless: false,
Expand Down
61 changes: 14 additions & 47 deletions packages/adblocker-webextension-example/background.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,52 +6,7 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/

import { BlockingResponse, fetchLists, fetchResources, Request, WebExtensionBlocker } from '@cliqz/adblocker-webextension';

/**
* Initialize the adblocker using lists of filters and resources. It returns a
* Promise resolving on the `Engine` that we will use to decide what requests
* should be blocked or altered.
*/
function loadAdblocker() {
console.log('Fetching resources...');
return Promise.all([fetchLists(), fetchResources()]).then(([responses, resources]) => {
console.log('Initialize adblocker...');
const deduplicatedLines = new Set();
for (let i = 0; i < responses.length; i += 1) {
const lines = responses[i].split(/\n/g);
for (let j = 0; j < lines.length; j += 1) {
deduplicatedLines.add(lines[j]);
}
}
const deduplicatedFilters = Array.from(deduplicatedLines).join('\n');

let t0 = Date.now();
const engine = WebExtensionBlocker.parse(deduplicatedFilters, {
enableCompression: true,
});
let total = Date.now() - t0;
console.log('parsing filters', total);

t0 = Date.now();
engine.updateResources(resources, '' + resources.length);
total = Date.now() - t0;
console.log('parsing resources', total);

t0 = Date.now();
const serialized = engine.serialize();
total = Date.now() - t0;
console.log('serialization', total);
console.log('size', serialized.byteLength);

t0 = Date.now();
const deserialized = WebExtensionBlocker.deserialize(serialized);
total = Date.now() - t0;
console.log('deserialization', total);

return deserialized as WebExtensionBlocker;
});
}
import { BlockingResponse, fullLists, Request, WebExtensionBlocker } from '@cliqz/adblocker-webextension';

/**
* Keep track of number of network requests altered for each tab
Expand Down Expand Up @@ -82,10 +37,22 @@ chrome.tabs.onActivated.addListener(({ tabId }: chrome.tabs.TabActiveInfo) =>
updateBlockedCounter(tabId),
);

loadAdblocker().then((engine) => {
WebExtensionBlocker.fromLists(fetch, fullLists).then((engine: WebExtensionBlocker) => {
engine.enableBlockingInBrowser();
engine.on('request-blocked', incrementBlockedCounter);
engine.on('request-redirected', incrementBlockedCounter);

engine.on('csp-injected', (request: Request) => {
console.log('csp', request.url);
});

engine.on('script-injected', (script: string, url: string) => {
console.log('script', script.length, url);
});

engine.on('style-injected', (style: string, url: string) => {
console.log('style', style.length, url);
});

console.log('Ready to roll!');
});
2 changes: 1 addition & 1 deletion packages/adblocker/adblocker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ export {
getLinesWithFilters,
} from './src/lists';
export { compactTokens, hasEmptyIntersection, mergeCompactSets } from './src/compact-set';
export { fetchLists, fetchResources } from './src/fetch';
export * from './src/fetch';
export { tokenize } from './src/utils';
export { default as Config } from './src/config';
83 changes: 72 additions & 11 deletions packages/adblocker/src/engine/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
import Config from '../config';
import StaticDataView from '../data-view';
import { EventEmitter } from '../events';
import {
adsAndTrackingLists,
adsLists,
Fetch,
fetchLists,
fetchPrebuilt,
fetchResources,
} from '../fetch';
import CosmeticFilter from '../filters/cosmetic';
import NetworkFilter from '../filters/network';
import { IListDiff, IRawDiff, parseFilters } from '../lists';
Expand Down Expand Up @@ -50,12 +58,71 @@ export default class FilterEngine extends EventEmitter<
| 'script-injected'
| 'style-injected'
> {
public static parse(filters: string, options: Partial<Config> = {}): FilterEngine {
public static fromLists<T extends typeof FilterEngine>(
this: T,
fetch: Fetch,
urls: string[],
resourcesUrl?: string | undefined,
config: Partial<Config> = {},
): Promise<InstanceType<T>> {
const listsPromises = fetchLists(fetch, urls);
const resourcesPromise = fetchResources(fetch, resourcesUrl);

return Promise.all([listsPromises, resourcesPromise]).then(([lists, resources]) => {
const engine = this.parse(lists.join('\n'), config);
if (resources !== undefined) {
engine.updateResources(resources, '' + resources.length);
}

return engine as InstanceType<T>;
});
}

public static fromPrebuiltAdsOnly<T extends typeof FilterEngine>(
this: T,
fetchImpl: Fetch = fetch,
): Promise<InstanceType<T>> {
return fetchPrebuilt(
fetchImpl,
'https://cdn.cliqz.com/adblocker/configs/desktop-ads/allowed-lists.json',
ENGINE_VERSION,
)
.then((buffer) => this.deserialize(buffer) as InstanceType<T>)
.catch(() => {
console.log('failed downloading pre-built, fallback to fetching lists');
return this.fromLists(fetchImpl, adsLists) as Promise<InstanceType<T>>;
});
}

public static fromPrebuiltAdsAndTracking<T extends typeof FilterEngine>(
this: T,
fetchImpl: Fetch = fetch,
): Promise<InstanceType<T>> {
return fetchPrebuilt(
fetchImpl,
'https://cdn.cliqz.com/adblocker/configs/desktop-ads-trackers/allowed-lists.json',
ENGINE_VERSION,
)
.then((buffer) => this.deserialize(buffer) as InstanceType<T>)
.catch(() => {
console.log('failed downloading pre-built, fallback to fetching lists');
return this.fromLists(fetchImpl, adsAndTrackingLists) as Promise<InstanceType<T>>;
});
}

public static parse<T extends FilterEngine>(
this: new (...args: any[]) => T,
filters: string,
options: Partial<Config> = {},
): T {
const config = new Config(options);
return new this(Object.assign({}, parseFilters(filters, config), { config }));
}

public static deserialize(serialized: Uint8Array): FilterEngine {
public static deserialize<T extends FilterEngine>(
this: new (...args: any[]) => T,
serialized: Uint8Array,
): T {
const buffer = StaticDataView.fromUint8Array(serialized, {
enableCompression: false,
});
Expand Down Expand Up @@ -192,28 +259,22 @@ export default class FilterEngine extends EventEmitter<
* ~20 bytes is to be expected).
*/
public getSerializedSize(): number {
let estimatedSize: number = (
let estimatedSize: number =
StaticDataView.sizeOfByte() + // engine version
this.config.getSerializedSize() +
this.resources.getSerializedSize() +

this.filters.getSerializedSize() +
this.exceptions.getSerializedSize() +
this.importants.getSerializedSize() +
this.redirects.getSerializedSize() +
this.csp.getSerializedSize() +
this.genericHides.getSerializedSize() +
this.cosmetics.getSerializedSize() +

4 // checksum
);
4; // checksum

// Estimate size of `this.lists` which stores information of checksum for each list.
for (const [name, checksum] of this.lists) {
estimatedSize += (
StaticDataView.sizeOfASCII(name) +
StaticDataView.sizeOfASCII(checksum)
);
estimatedSize += StaticDataView.sizeOfASCII(name) + StaticDataView.sizeOfASCII(checksum);
}

return estimatedSize;
Expand Down
Loading

0 comments on commit 192836b

Please sign in to comment.