Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement cosmetic filtering #47

Merged
merged 36 commits into from
Nov 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
90824c1
add module for cosmetic filters
antonok-edm Jul 2, 2019
462534c
cosmetic filter rule parsing implementation
antonok-edm Jul 2, 2019
73afe91
create errors specific to cosmetic filtering
antonok-edm Jul 3, 2019
85c2b03
break entity/hostname parsing into separate function
antonok-edm Jul 8, 2019
77c382a
add framework for CSS parsing error checking
antonok-edm Jul 3, 2019
e6160aa
add benchmark with cosmetic filtering enabled
antonok-edm Jul 4, 2019
91a0716
add dependencies on cssparser and selectors from servo
antonok-edm Jul 5, 2019
00b74cf
implement CSS selector validation
antonok-edm Jul 5, 2019
47129a1
implement cosmetic filter matching
antonok-edm Jul 8, 2019
f6b8fd0
rework id and class flags to be more similar to uBlock Origin
antonok-edm Jul 12, 2019
df56646
implement escape sequence parsing
antonok-edm Jul 12, 2019
af4932e
add cosmetic filter cache
antonok-edm Jul 16, 2019
7f513d5
add initial benchmarks for cosmetic filter bulk matching
antonok-edm Jul 16, 2019
19d92ad
add CosmeticFilterCache to the Engine
antonok-edm Jul 18, 2019
0bbcf32
remove cosmetic/network filter enable flags from Blocker
antonok-edm Jul 18, 2019
8ddcc70
expose cosmetic filter methods in engine
antonok-edm Jul 18, 2019
018eec5
refactor list parsing to expose individual rule parsing method
antonok-edm Jul 19, 2019
87d80bb
allow efficiently adding single rules with a lazy-evaluated base styl…
antonok-edm Jul 19, 2019
b4f0f39
support adding individual cosmetic filter rules
antonok-edm Jul 19, 2019
c996a9f
add public suffix list dependency
antonok-edm Jul 23, 2019
3db6ce5
use PSL to remove domain argument
antonok-edm Jul 23, 2019
1b973f6
expose method to parse and add a rule list to the engine
antonok-edm Jul 24, 2019
6435fe1
initial implementation of hostname rule db
antonok-edm Jul 26, 2019
a4b0ac7
rework cosmetic filter cache to support exceptions, styles, and scrip…
antonok-edm Jul 26, 2019
21ffb01
finish exception storage implementation
antonok-edm Aug 2, 2019
301a8be
take exceptions into account for class and id stylesheets
antonok-edm Jul 27, 2019
08a5498
expose all hostname resources to external api
antonok-edm Jul 27, 2019
f79b3a6
no need to expose any hostname-specific exceptions
antonok-edm Jul 29, 2019
831bcd0
support backwards compatible serialization/deserialization
antonok-edm Aug 2, 2019
8431ad3
remove base_stylesheet from public interface
antonok-edm Aug 2, 2019
d306fcd
use a hashSet for misc generic selectors
antonok-edm Oct 29, 2019
1946fad
add once_cell dependency
antonok-edm Oct 29, 2019
9ced4b8
add scriptlet resource storage
antonok-edm Oct 29, 2019
ce3448e
add scriptlet storage to cosmetic filter cache
antonok-edm Oct 29, 2019
809a611
allow updating cosmetic resources through the engine API
antonok-edm Oct 29, 2019
a2045a6
return primitive types, delegate stylesheet creation past FFI
antonok-edm Oct 29, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ url = "2.1"
percent-encoding = "2.1"
matches = "0.1"
lazy_static = "1.4"
once_cell = "1.2"
regex = "1"
bitflags = "1.2"
itertools = "0.8"
Expand All @@ -36,6 +37,9 @@ base64 = "0.10"
rmp-serde = "0.13.7" # rmp-serde 0.14.0 breaks deserialization by changing how enums are deserialized
hashbrown = { version = "0.6", features = ["serde"] }
lifeguard = { version = "0.6", optional = true }
cssparser = "0.25"
selectors = "0.21"
psl = "0.4.1"

[dev-dependencies]
criterion = "0.2"
Expand Down Expand Up @@ -67,6 +71,10 @@ harness = false
name = "bench_redirect_performance"
harness = false

[[bench]]
name = "bench_cosmetic_matching"
harness = false

[features]
default = ["full-regex-handling", "object-pooling"]
full-domain-matching = [] # feature has no explicit dependencies
Expand Down
119 changes: 119 additions & 0 deletions benches/bench_cosmetic_matching.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
extern crate criterion;

use criterion::*;

use adblock::utils::rules_from_lists;
use adblock::lists::parse_filters;
use adblock::cosmetic_filter_cache::CosmeticFilterCache;

fn by_hostname(c: &mut Criterion) {
c.bench(
"cosmetic hostname match",
Benchmark::new("easylist", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
]);
let (_, cosmetic_filters) = parse_filters(&rules, false, true, false);
let cfcache = CosmeticFilterCache::new(cosmetic_filters);
b.iter(|| cfcache.hostname_cosmetic_resources("google.com"))
}).with_function("many lists", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(),
"data/uBlockOrigin/filters.txt".to_owned(),
"data/uBlockOrigin/unbreak.txt".to_owned(),
]);
let (_, cosmetic_filters) = parse_filters(&rules, false, true, false);
let cfcache = CosmeticFilterCache::new(cosmetic_filters);
b.iter(|| cfcache.hostname_cosmetic_resources("google.com"))
}).with_function("complex_hostname", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(),
"data/uBlockOrigin/filters.txt".to_owned(),
"data/uBlockOrigin/unbreak.txt".to_owned(),
]);
let (_, cosmetic_filters) = parse_filters(&rules, false, true, false);
let cfcache = CosmeticFilterCache::new(cosmetic_filters);
b.iter(|| cfcache.hostname_cosmetic_resources("ads.serve.1.domain.google.com"))
})
.throughput(Throughput::Elements(1))
.sample_size(20)
);
}

fn by_classes_ids(c: &mut Criterion) {
c.bench(
"cosmetic class, id match",
Benchmark::new("easylist", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
]);
let (_, cosmetic_filters) = parse_filters(&rules, false, true, false);
let cfcache = CosmeticFilterCache::new(cosmetic_filters);
let exceptions = Default::default();
b.iter(|| cfcache.class_id_stylesheet(&vec!["ad".to_owned()][..], &vec!["ad".to_owned()][..], &exceptions))
}).with_function("many lists", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(),
"data/uBlockOrigin/filters.txt".to_owned(),
"data/uBlockOrigin/unbreak.txt".to_owned(),
]);
let (_, cosmetic_filters) = parse_filters(&rules, false, true, false);
let cfcache = CosmeticFilterCache::new(cosmetic_filters);
let exceptions = Default::default();
b.iter(|| cfcache.class_id_stylesheet(&vec!["ad".to_owned()][..], &vec!["ad".to_owned()][..], &exceptions))
}).with_function("many matching classes and ids", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylistgermany/easylistgermany.txt".to_owned(),
"data/uBlockOrigin/filters.txt".to_owned(),
"data/uBlockOrigin/unbreak.txt".to_owned(),
]);
let (_, cosmetic_filters) = parse_filters(&rules, false, true, false);
let cfcache = CosmeticFilterCache::new(cosmetic_filters);
let exceptions = Default::default();
let class_list = vec![
"block-bg-advertisement-region-1".to_owned(),
"photobox-adbox".to_owned(),
"headerad-720".to_owned(),
"rscontainer".to_owned(),
"rail-article-sponsored".to_owned(),
"fbPhotoSnowboxAds".to_owned(),
"sidebar_ad_module".to_owned(),
"ad-728x90_forum".to_owned(),
"commercial-unit-desktop-rhs".to_owned(),
"sponsored-editorial".to_owned(),
"rr-300x600-ad".to_owned(),
"adfoot".to_owned(),
"lads".to_owned(),
];
let id_list = vec![
"footer-adspace".to_owned(),
"adsponsored_links_box".to_owned(),
"lsadvert-top".to_owned(),
"mn".to_owned(),
"col-right-ad".to_owned(),
"view_ads_bottom_bg_middle".to_owned(),
"ad_468x60".to_owned(),
"rightAdColumn".to_owned(),
"content".to_owned(),
"rhs_block".to_owned(),
"center_col".to_owned(),
"header".to_owned(),
"advertisingModule160x600".to_owned(),
];
b.iter(|| cfcache.class_id_stylesheet(&class_list[..], &id_list[..], &exceptions))
})
.throughput(Throughput::Elements(1))
.sample_size(20)
);
}

criterion_group!(
cosmetic_benches,
by_hostname,
by_classes_ids,
);
criterion_main!(cosmetic_benches);
61 changes: 25 additions & 36 deletions benches/bench_matching.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,14 @@ fn load_requests() -> Vec<TestRequest> {
}

fn get_blocker(rules: &Vec<String>) -> Blocker {
let (network_filters, _) = adblock::lists::parse_filters(rules, true, false, false);

let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
load_cosmetic_filters: false,
load_network_filters: true
};
let (network_filters, _) = adblock::lists::parse_filters(rules, true, false, false);

let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
};

Blocker::new(network_filters, &blocker_options)
Blocker::new(network_filters, &blocker_options)
}

fn bench_rule_matching(engine: &Engine, requests: &Vec<TestRequest>) -> (u32, u32) {
Expand Down Expand Up @@ -320,42 +318,33 @@ fn rule_match_browserlike_comparable(c: &mut Criterion) {
let el_req = elep_req.clone();
let slim = elep_req.clone();

c.bench(
c.bench(
"rule-match-browserlike",
Benchmark::new("el+ep", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylist/easyprivacy.txt".to_owned()
]);
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &elep_req))
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
"data/easylist.to/easylist/easyprivacy.txt".to_owned()
]);
let engine = Engine::from_rules_parametrised(&rules, true, false, false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &elep_req))
},)
.with_function("el", move |b| {
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
]);
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &el_req))
let rules = rules_from_lists(&vec![
"data/easylist.to/easylist/easylist.txt".to_owned(),
]);
let engine = Engine::from_rules_parametrised(&rules, true, false, false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &el_req))
},)
.with_function("slimlist", move |b| {
let rules = rules_from_lists(&vec![
"data/slim-list.txt".to_owned()
]);
let blocker = get_blocker(&rules);
let engine = Engine {
blocker
};
b.iter(|| bench_rule_matching_browserlike(&engine, &slim))
let rules = rules_from_lists(&vec![
"data/slim-list.txt".to_owned()
]);
let engine = Engine::from_rules_parametrised(&rules, true, false, false, true);
b.iter(|| bench_rule_matching_browserlike(&engine, &slim))
},)
.throughput(Throughput::Elements(requests_len))
.sample_size(20)
);
);
}

criterion_group!(
Expand Down
2 changes: 0 additions & 2 deletions benches/bench_redirect_performance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ fn get_preloaded_blocker(rules: Vec<NetworkFilter>) -> Blocker {
let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
load_cosmetic_filters: false,
load_network_filters: true
};

let mut blocker = Blocker::new(rules, &blocker_options);
Expand Down
49 changes: 23 additions & 26 deletions benches/bench_rules.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
extern crate criterion;

use criterion::*;
use lazy_static::lazy_static;

use adblock;
use adblock::utils::{read_file_lines, rules_from_lists};
use adblock::blocker::{Blocker, BlockerOptions};


fn default_lists() -> Vec<String> {
rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
])
}

fn default_rules_lists() -> Vec<Vec<String>> {
vec![
read_file_lines("data/easylist.to/easylist/easylist.txt"),
]
lazy_static! {
static ref DEFAULT_LISTS: Vec<String> = rules_from_lists(&vec![
String::from("data/easylist.to/easylist/easylist.txt"),
]);
static ref DEFAULT_RULES_LISTS: Vec<Vec<String>> = vec![
read_file_lines("data/easylist.to/easylist/easylist.txt"),
];
}


Expand All @@ -38,23 +36,21 @@ fn bench_string_tokenize(filters: &Vec<String>) -> usize {


fn string_hashing(c: &mut Criterion) {
let rules = default_lists();
c.bench(
"string-hashing",
Benchmark::new(
"hash",
move |b| b.iter(|| bench_string_hashing(&rules)),
move |b| b.iter(|| bench_string_hashing(&DEFAULT_LISTS)),
).throughput(Throughput::Elements(1)),
);
}

fn string_tokenize(c: &mut Criterion) {
let rules = default_lists();
c.bench(
"string-tokenize",
Benchmark::new(
"tokenize",
move |b| b.iter(|| bench_string_tokenize(&rules)),
move |b| b.iter(|| bench_string_tokenize(&DEFAULT_LISTS)),
).throughput(Throughput::Elements(1)),
);
}
Expand All @@ -71,31 +67,32 @@ fn bench_parsing_impl(lists: &Vec<Vec<String>>, load_network_filters: bool, load
}

fn list_parse(c: &mut Criterion) {
let rules_lists = default_rules_lists();
c.bench(
"parse-filters",
Benchmark::new(
"network filters",
move |b| b.iter(|| bench_parsing_impl(&rules_lists, true, false)),
).throughput(Throughput::Elements(1))
|b| b.iter(|| bench_parsing_impl(&DEFAULT_RULES_LISTS, true, false)),
).with_function(
"all filters",
|b| b.iter(|| bench_parsing_impl(&DEFAULT_RULES_LISTS, true, true)),
)
.throughput(Throughput::Elements(1))
.sample_size(10)
);
}


fn get_blocker(rules: &Vec<String>) -> Blocker {
let (network_filters, _) = adblock::lists::parse_filters(rules, true, false, false);
let (network_filters, _) = adblock::lists::parse_filters(rules, true, false, false);

println!("Got {} network filters", network_filters.len());
println!("Got {} network filters", network_filters.len());

let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
load_cosmetic_filters: false,
load_network_filters: true
};
let blocker_options = BlockerOptions {
debug: false,
enable_optimizations: true,
};

Blocker::new(network_filters, &blocker_options)
Blocker::new(network_filters, &blocker_options)
}


Expand Down
Loading