Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implements uBO style polyfills for requests redirects #29

Merged
merged 9 commits into from
Jun 18, 2019
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ bincode = "1.1"
flate2 = "1"
seahash = "3"
twoway = "0.2"
base64 = "0.10"

# [target.'cfg(any(unix, windows))'.dependencies]
# rayon = "1.0"
Expand Down
38 changes: 34 additions & 4 deletions src/blocker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::filters::network::{NetworkFilter, NetworkMatchable};
use crate::request::Request;
use crate::utils::{fast_hash, Hash};
use crate::optimizer;
use crate::resources::{Resources};
use base64;

pub struct BlockerOptions {
pub debug: bool,
Expand All @@ -16,6 +18,7 @@ pub struct BlockerOptions {
pub load_network_filters: bool,
}

#[derive(Debug)]
pub struct BlockerResult {
pub matched: bool,
pub explicit_cancel: bool,
Expand Down Expand Up @@ -49,6 +52,8 @@ pub struct Blocker {
enable_optimizations: bool,
load_cosmetic_filters: bool,
load_network_filters: bool,

resources: Option<Resources>
}

impl Blocker {
Expand Down Expand Up @@ -95,11 +100,28 @@ impl Blocker {
}
});

// If there is a match
// only match redirects if we have them set up
let redirect: Option<String> = filter.as_ref().and_then(|f| {
if f.is_redirect() {
// TODO: build up redirect URL from matching resource
unimplemented!()
// If there is a match
if let Some(blocker_redirects) = self.resources.as_ref() {
// Filter redirect option is set
if let Some(redirect) = f.redirect.as_ref() {
// And we have a matching redirect resource
if let Some(resource) = blocker_redirects.get_resource(redirect) {
let mut data_url: String;
if resource.content_type.contains(';') {
data_url = format!("data:{},{}", resource.content_type, resource.data);
} else {
data_url = format!("data:{};base64,{}", resource.content_type, base64::encode(&resource.data));
}
Some(data_url.trim().to_owned())
} else {
// TOOD: handle error - throw?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be logged somehow, even if its just mirroring the "didn't understand filter: X" stuff the current lib does? Would be a nice, noisy reminder if there is some new filter format we don't support, something like that

None
}
} else {
None
}
} else {
None
}
Expand Down Expand Up @@ -195,6 +217,8 @@ impl Blocker {
enable_optimizations: options.enable_optimizations,
load_cosmetic_filters: options.load_cosmetic_filters,
load_network_filters: options.load_network_filters,

resources: None
}
}

Expand Down Expand Up @@ -232,6 +256,12 @@ impl Blocker {
pub fn tags_enabled(&self) -> Vec<String> {
self.tags_enabled.iter().cloned().collect()
}

pub fn with_resources<'a>(&'a mut self, resources: &'a str) -> &'a mut Blocker {
let resources = Resources::parse(resources);
self.resources = Some(resources);
self
}
}

#[derive(Serialize, Deserialize)]
Expand Down
21 changes: 19 additions & 2 deletions src/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,20 @@ impl Engine {
}

pub fn check_network_urls(&self, url: &str, source_url: &str, request_type: &str) -> BlockerResult {
let request = Request::from_urls(&url, &source_url, &request_type).unwrap();
self.blocker.check(&request)
Request::from_urls(&url, &source_url, &request_type)
.map(|request| {
self.blocker.check(&request)
})
.unwrap_or_else(|_e| {
BlockerResult {
matched: false,
explicit_cancel: false,
redirect: None,
exception: None,
filter: None,
}
})

}

pub fn check_network_urls_with_hostnames(&self, url: &str, hostname: &str, source_hostname: &str, request_type: &str, third_party_request: Option<bool>) -> BlockerResult {
Expand All @@ -81,6 +93,11 @@ impl Engine {
pub fn tags_disable<'a>(&'a mut self, tags: &[&str]) -> () {
self.blocker.tags_disable(tags);
}

pub fn with_resources<'a>(&'a mut self, resources: &'a str) -> &'a mut Engine {
self.blocker.with_resources(resources);
self
}
}


Expand Down
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ extern crate bincode; // binary serialization/deserialization
extern crate flate2;
extern crate regex;
extern crate idna; // utf domain handling
extern crate base64;

#[cfg(test)]
extern crate csv; // csv handling library used for processing test data
Expand All @@ -28,3 +29,4 @@ pub mod optimizer;
pub mod url_parser;
pub mod engine;
pub mod filter_lists;
pub mod resources;
195 changes: 195 additions & 0 deletions src/resources.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
use std::collections::HashMap;
use regex::Regex;
use serde::{Deserialize, Serialize};

#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct Resource {
pub content_type: String,
pub data: String
}

#[derive(Serialize, Deserialize, Debug, PartialEq)]
pub struct Resources {
pub resources: HashMap<String, Resource>
}

impl Resources {
pub fn parse(data: &str) -> Resources {
let chunks = data.split("\n\n");
let mut type_to_resource: HashMap<String, HashMap<String, String>> = HashMap::new();

lazy_static! {
static ref COMMENTS_RE: Regex = Regex::new(r"(?m:^\s*#.*$)").unwrap();
}

for chunk in chunks {
let resource: String = COMMENTS_RE.replace_all(&chunk, "").to_string();
let resource: String = resource.trim().to_owned();
if resource.is_empty() {
continue;
}
let first_new_line = resource.find("\n");
let first_new_line_pos;
// No new line, but appears to encode mime type and teh content is not base64, so can be empty
if first_new_line.is_none() && resource.contains(" ") && resource.contains("/") && !resource.contains(";base64") {
first_new_line_pos = resource.len();
} else if first_new_line.is_none() {
continue;
} else {
first_new_line_pos = first_new_line.unwrap();
}
let (first_line, body) = resource.split_at(first_new_line_pos);
let mut first_line_items = first_line.split_ascii_whitespace();
let (name, rtype) = (
first_line_items.next(),
first_line_items.next()
);
if name.is_none() || rtype.is_none() {
continue;
}
let rtype = rtype.unwrap().to_owned();
let name = name.unwrap().to_owned();
let body = body.trim().to_owned();

let ttr = type_to_resource.entry(rtype).or_insert(HashMap::new());
ttr.insert(name, body);
}

// Create a mapping from resource name to { contentType, data }
// used for request redirection.
let mut resources: HashMap<String, Resource> = HashMap::new();
for (content_type, type_resources) in type_to_resource {
for (name, resource) in type_resources {
resources.insert(name, Resource {
content_type: content_type.to_owned(),
data: resource
});
}
}

Resources {
resources,
}
}

pub fn get_resource(&self, name: &str) -> Option<&Resource> {
self.resources.get(name)
}
}

#[cfg(test)]
mod tests {

use super::*;
use crate::utils;

#[test]
fn parses_empty_resources() {
let resources = Resources::parse("");
assert!(resources.resources.is_empty());
}

#[test]
fn parses_one_resource() {
let resources_str = "foo application/javascript\ncontent";
let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("foo".to_owned(), Resource {
content_type: "application/javascript".to_owned(),
data: "content".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn parses_two_resources() {
let resources_str = r###"
foo application/javascript
content1

pixel.png image/png;base64
content2"###;
let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("foo".to_owned(), Resource {
content_type: "application/javascript".to_owned(),
data: "content1".to_owned()
});
expected.insert("pixel.png".to_owned(), Resource {
content_type: "image/png;base64".to_owned(),
data: "content2".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn robust_to_weird_format() {
let resources_str = r###"
# Comment
# Comment 2
foo application/javascript
content1
# Comment 3

# Type missing
pixel.png
content

# Content missing
pixel.png image/png;base64

# This one is good!
pixel.png image/png;base64
content2
"###;

let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("foo".to_owned(), Resource {
content_type: "application/javascript".to_owned(),
data: "content1".to_owned()
});
expected.insert("pixel.png".to_owned(), Resource {
content_type: "image/png;base64".to_owned(),
data: "content2".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn parses_noop_resources() {
let resources_str = r###"
nooptext text/plain


noopcss text/css


"###;
let resources = Resources::parse(resources_str);
assert!(resources.resources.is_empty() == false);
let mut expected = HashMap::new();
expected.insert("nooptext".to_owned(), Resource {
content_type: "text/plain".to_owned(),
data: "".to_owned()
});
expected.insert("noopcss".to_owned(), Resource {
content_type: "text/css".to_owned(),
data: "".to_owned()
});
assert_eq!(resources.resources, expected);
}

#[test]
fn handles_ubo_resources() {
let resources_lines = utils::read_file_lines("data/uBlockOrigin/resources.txt");
let resources_str = resources_lines.join("\n");
assert!(!resources_str.is_empty());
let resources = Resources::parse(&resources_str);
assert!(resources.resources.is_empty() == false);
assert_eq!(resources.resources.len(), 110);
}
}
4 changes: 2 additions & 2 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ pub fn has_unicode(pattern: &str) -> bool {

const EXPECTED_RULES: usize = 75000;
#[cfg(not(target_arch = "wasm32"))]
pub fn read_rules(filename: &str) -> Vec<String> {
pub fn read_file_lines(filename: &str) -> Vec<String> {
let f = File::open(filename).unwrap_or_else(|_| panic!("File {} not found", filename));
let reader = BufReader::new(f);
let mut rules: Vec<String> = Vec::with_capacity(EXPECTED_RULES);
Expand All @@ -192,7 +192,7 @@ pub fn read_rules(filename: &str) -> Vec<String> {
pub fn rules_from_lists(lists: &[String]) -> Vec<String> {
let mut rules: Vec<String> = Vec::with_capacity(EXPECTED_RULES);
for filename in lists {
let mut list_rules = read_rules(filename);
let mut list_rules = read_file_lines(filename);
rules.append(&mut list_rules);
}
rules.shrink_to_fit();
Expand Down
Loading