Skip to content

Commit

Permalink
Implement stress-test for FiltersEngine updates
Browse files Browse the repository at this point in the history
  • Loading branch information
remusao committed Jun 14, 2019
1 parent eba8190 commit 1881e6e
Show file tree
Hide file tree
Showing 11 changed files with 237 additions and 164 deletions.
1 change: 1 addition & 0 deletions adblocker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export { default as CosmeticFilter } from './src/filters/cosmetic';
export { default as NetworkFilter } from './src/filters/network';
export {
f,
parseFilter,
parseFilters,
IRawDiff,
IListDiff,
Expand Down
19 changes: 19 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
},
"dependencies": {
"@types/puppeteer": "^1.12.4",
"sandboxed-module": "^2.0.3",
"tldts": "^5.0.3",
"tslib": "^1.9.3",
"tsmaz": "^1.2.1"
Expand Down
8 changes: 8 additions & 0 deletions src/engine/bucket/filters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ export default class FiltersContainer<T extends IFilter> {
// Store filters in their compact form
const buffer = StaticDataView.allocate(bufferSizeEstimation, this.config);
buffer.pushUint32(selected.length);

// When we run in `debug` mode, we enable fully deterministic updates of
// internal data-structure. To this effect, we sort all filters before
// insertion.
if (this.config.debug === true) {
selected.sort((f1: T, f2: T): number => f1.getId() - f2.getId());
}

for (let i = 0; i < selected.length; i += 1) {
selected[i].serialize(buffer);
}
Expand Down
46 changes: 27 additions & 19 deletions src/engine/reverse-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,25 +351,33 @@ export default class ReverseIndex<T extends IFilter> {

// Compute tokens for all filters (the ones already contained in the index
// *plus* the new ones *minus* the ones removed ).
const filtersArrays = [this.getFilters(), newFilters];
for (let h = 0; h < filtersArrays.length; h += 1) {
const filters = filtersArrays[h];
for (let i = 0; i < filters.length; i += 1) {
const filter = filters[i];
if (removedFilters === undefined || removedFilters.has(filter.getId()) === false) {
const multiTokens = filter.getTokens();
filtersTokens.push({
filter,
multiTokens,
});

for (let j = 0; j < multiTokens.length; j += 1) {
const tokens = multiTokens[j];
totalNumberOfIndexedFilters += 1;
for (let k = 0; k < tokens.length; k += 1) {
totalNumberOfTokens += 1;
histogram.incr(tokens[k]);
}
const filters = this.getFilters();
for (let i = 0; i < newFilters.length; i += 1) {
filters.push(newFilters[i]);
}

// When we run in `debug` mode, we enable fully deterministic updates of
// internal data-structure. To this effect, we sort all filters before
// insertion.
if (this.config.debug === true) {
filters.sort((f1: T, f2: T): number => f1.getId() - f2.getId());
}

for (let i = 0; i < filters.length; i += 1) {
const filter = filters[i];
if (removedFilters === undefined || removedFilters.has(filter.getId()) === false) {
const multiTokens = filter.getTokens();
filtersTokens.push({
filter,
multiTokens,
});

for (let j = 0; j < multiTokens.length; j += 1) {
const tokens = multiTokens[j];
totalNumberOfIndexedFilters += 1;
for (let k = 0; k < tokens.length; k += 1) {
totalNumberOfTokens += 1;
histogram.incr(tokens[k]);
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions src/filters/cosmetic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ function computeFilterId(
entities: Uint32Array | undefined,
notHostnames: Uint32Array | undefined,
notEntities: Uint32Array | undefined,
style: string | undefined,
): number {
let hash = (5408 * 33) ^ mask;

Expand Down Expand Up @@ -223,6 +224,12 @@ function computeFilterId(
}
}

if (style !== undefined) {
for (let i = 0; i < style.length; i += 1) {
hash = (hash * 33) ^ style.charCodeAt(i);
}
}

return hash >>> 0;
}

Expand Down Expand Up @@ -894,6 +901,7 @@ export default class CosmeticFilter implements IFilter {
this.entities,
this.notHostnames,
this.notEntities,
this.style,
);
}
return this.id;
Expand Down
7 changes: 7 additions & 0 deletions src/filters/network.ts
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,13 @@ export default class NetworkFilter implements IFilter {

switch (option) {
case 'domain': {
// domain list starting or ending with '|' is invalid
if (
optionValue.charCodeAt(0) === 124 /* '|' */ ||
optionValue.charCodeAt(optionValue.length - 1) === 124 /* '|' */
) {
return null;
}
const optionValues: string[] = optionValue.split('|');
const optDomainsArray: number[] = [];
const optNotDomainsArray: number[] = [];
Expand Down
77 changes: 44 additions & 33 deletions src/lists.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,22 @@ function detectFilterType(line: string): FilterType {
return FilterType.NETWORK;
}

export function f(strings: TemplateStringsArray): NetworkFilter | CosmeticFilter | null {
const rawFilter = strings[0];
const filterType = detectFilterType(rawFilter);
export function parseFilter(filter: string): NetworkFilter | CosmeticFilter | null {
const filterType = detectFilterType(filter);

if (filterType === FilterType.NETWORK) {
return NetworkFilter.parse(rawFilter, true);
return NetworkFilter.parse(filter, true);
} else if (filterType === FilterType.COSMETIC) {
return CosmeticFilter.parse(rawFilter, true);
return CosmeticFilter.parse(filter, true);
}

return null;
}

export function f(strings: TemplateStringsArray): NetworkFilter | CosmeticFilter | null {
return parseFilter(strings[0]);
}

export function parseFilters(
list: string,
config: Partial<Config> = new Config(),
Expand Down Expand Up @@ -168,6 +171,15 @@ export function parseFilters(
return { networkFilters, cosmeticFilters };
}

function getFilters(
list: string,
config?: Partial<Config>,
): Array<NetworkFilter | CosmeticFilter> {
const { networkFilters, cosmeticFilters } = parseFilters(list, config);
const filters: Array<NetworkFilter | CosmeticFilter> = [];
return filters.concat(networkFilters).concat(cosmeticFilters);
}

export interface IListDiff {
newNetworkFilters: NetworkFilter[];
newCosmeticFilters: CosmeticFilter[];
Expand All @@ -186,54 +198,53 @@ export interface IRawDiff {
* un-supported filters are dropped).
*/
export function getLinesWithFilters(
raw: string,
list: string,
config: Partial<Config> = new Config(),
): Set<string> {
config = new Config(Object.assign({}, config, { debug: true }));

const {
networkFilters,
cosmeticFilters,
}: {
networkFilters: NetworkFilter[];
cosmeticFilters: CosmeticFilter[];
} = parseFilters(raw, config);

// Set config to `debug` so that we keep track of raw lines for each filter
return new Set(
networkFilters
.map((filter) => filter.rawLine as string)
.concat(cosmeticFilters.map((filter) => filter.rawLine as string)),
getFilters(list, new Config(Object.assign({}, config, { debug: true }))).map(
({ rawLine }) => rawLine as string,
),
);
}

/**
* Given two versions of the same subscription (e.g.: EasyList) as a string,
* generate a raw diff (i.e.: a list of lines added and lines removed).
* generate a raw diff (i.e.: a list of filters added and filters removed, in
* their raw string form).
*/
export function generateDiff(
prevRevision: string,
newRevision: string,
config: Partial<Config> = new Config(),
): IRawDiff {
const prevRevisionLines: Set<string> = getLinesWithFilters(prevRevision, config);
const newRevisionLines: Set<string> = getLinesWithFilters(newRevision, config);
// Set config to `debug` so that we keep track of raw lines for each filter
const debugConfig = new Config(Object.assign({}, config, { debug: true }));

const prevRevisionFilters = getFilters(prevRevision, debugConfig);
const prevRevisionIds = new Set(prevRevisionFilters.map((filter) => filter.getId()));

const added: string[] = [];
const removed: string[] = [];
const newRevisionFilters = getFilters(newRevision, debugConfig);
const newRevisionIds = new Set(newRevisionFilters.map((filter) => filter.getId()));

newRevisionLines.forEach((line) => {
if (!prevRevisionLines.has(line)) {
added.push(line);
// Check which filters were added, based on ID
const added: Set<string> = new Set();
newRevisionFilters.forEach((filter) => {
if (!prevRevisionIds.has(filter.getId())) {
added.add(filter.rawLine as string);
}
});

prevRevisionLines.forEach((line) => {
if (!newRevisionLines.has(line)) {
removed.push(line);
// Check which filters were removed, based on ID
const removed: Set<string> = new Set();
prevRevisionFilters.forEach((filter) => {
if (!newRevisionIds.has(filter.getId())) {
removed.add(filter.rawLine as string);
}
});

return { added, removed };
return { added: Array.from(added), removed: Array.from(removed) };
}

/**
Expand Down Expand Up @@ -269,7 +280,7 @@ export function mergeDiffs(diffs: Array<Partial<IRawDiff>>): IRawDiff {
}

return {
added: Array.from(addedCumul).sort(),
removed: Array.from(removedCumul).sort(),
added: Array.from(addedCumul),
removed: Array.from(removedCumul),
};
}
Loading

0 comments on commit 1881e6e

Please sign in to comment.