diff --git a/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift b/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift index e77077f950a..93309032b67 100644 --- a/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift +++ b/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift @@ -201,20 +201,37 @@ extension BrowserViewController: WKNavigationDelegate { let currentURL = tab?.webView?.url, currentURL.baseDomain != url.baseDomain, domainForRequestURL.isShieldExpected(.AdblockAndTp, considerAllShieldsOption: true), - navigationAction.targetFrame?.isMainFrame == true, - let redirectURL = DebouncingResourceDownloader.shared.redirectURL(for: url) { - // Cancel the original request. We don't want it to load as it's tracking us - decisionHandler(.cancel, preferences) + navigationAction.targetFrame?.isMainFrame == true { + + // Lets get the redirect chain. + // Then we simply get all elements up until the user allows us to redirect + // (i.e. appropriate settings are enabled for that redirect rule) + let redirectChain = DebouncingResourceDownloader.shared + .redirectChain(for: url) + .contiguiousUntil { _, rule in + return rule.preferences.allSatisfy { pref in + switch pref { + case .deAmpEnabled: + return Preferences.Shields.autoRedirectAMPPages.value + } + } + } + + // Once we check the redirect chain only need the last (final) url from our redirect chain + if let redirectURL = redirectChain.last?.url { + // Cancel the original request. We don't want it to load as it's tracking us + decisionHandler(.cancel, preferences) - // For now we only allow the `Referer`. The browser will other headers during navigation. - var modifiedRequest = URLRequest(url: redirectURL) + // For now we only allow the `Referer`. The browser will other headers during navigation. + var modifiedRequest = URLRequest(url: redirectURL) - for (headerKey, headerValue) in navigationAction.request.allHTTPHeaderFields ?? [:] { - guard headerKey == "Referer" else { continue } - modifiedRequest.setValue(headerValue, forHTTPHeaderField: headerKey) - } + for (headerKey, headerValue) in navigationAction.request.allHTTPHeaderFields ?? [:] { + guard headerKey == "Referer" else { continue } + modifiedRequest.setValue(headerValue, forHTTPHeaderField: headerKey) + } - tab?.loadRequest(modifiedRequest) + tab?.loadRequest(modifiedRequest) + } } // Add de-amp script diff --git a/Client/WebFilters/DebouncingResourceDownloader.swift b/Client/WebFilters/DebouncingResourceDownloader.swift index 18822e640e8..d8c8634be54 100644 --- a/Client/WebFilters/DebouncingResourceDownloader.swift +++ b/Client/WebFilters/DebouncingResourceDownloader.swift @@ -12,24 +12,40 @@ private let log = Logger.browserLogger /// An helper class that manages the debouncing list and offers convenience methods public class DebouncingResourceDownloader { + /// An object representing errors returned from `MatcherRule` + enum RuleError: Error { + /// An error returned during parsing when an action is not supported by iOS. + /// This could be a valid scenario if we decide to add new actions in the future + case unsupportedAction(String) + /// An error returned during parsing when an action is not supported by iOS + /// This could be a valid scenario if we decide to add new prefs in the future + case unsupportedPref(String) + } + /// Object representing an item in the debouncing.json file found here: /// https://github.com/brave/adblock-lists/blob/master/brave-lists/debounce.json struct MatcherRule: Decodable { private enum CodingKeys: String, CodingKey { - case include - case exclude - case action - case param + case include, exclude, action, param, pref, prependScheme } + /// Defines actions that should be performed on the matching url + /// - Note: Some rules can be paired while others cannot. enum Action: String { + /// Requres the extracted query param to be base64 decoded + /// The extracted url is provided as a query param and the param name is defined in the `param` field case base64 + /// Defines that the url should be redirected to the extracted url + /// The extracted url is provided as a query param and the param name is defined in the `param` field + /// unless `regex-path` is used case redirect + /// Defines that the url should be extracted by a regex pattern. + /// The regex is available in the `param` field. case regexPath = "regex-path" } - enum RuleError: Error { - case unsupportedAction(String) + enum Preferance: String { + case deAmpEnabled = "brave.de_amp.enabled" } /// A set of patterns that are include in this rule @@ -38,22 +54,12 @@ public class DebouncingResourceDownloader { let exclude: Set? /// The query param that contains the redirect `URL` to be extractted from let param: String - /// The actions in a raw format. Need to be broken down to a set of `Action` entries - private let action: String - - /// Actions in a strictly typed format and split up into an array - /// - Note: Unrecognized actions will be filtered out - func makeActions() throws -> Set { - let actionStrings = action.split(separator: ",") - - return Set(try actionStrings.map({ - if let action = Action(rawValue: String($0)) { - return action - } else { - throw RuleError.unsupportedAction(String($0)) - } - })) - } + /// The scheme to prepend if the scheme is not available + let prependScheme: String? + /// The actions to be performed on the URL + let actions: Set + /// A list of preferences to be checked before debouncing + let preferences: Set /// Determines if this url is not excluded in the exclude list func isExcluded(url: URL) -> Bool { @@ -71,6 +77,193 @@ public class DebouncingResourceDownloader { // and is in the include list return !isExcluded(url: url) && isIncluded(url: url) } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.include = try container.decode(Set.self, forKey: .include) + self.exclude = try container.decode(Set.self, forKey: .exclude) + self.param = try container.decode(String.self, forKey: .param) + self.prependScheme = try container.decodeIfPresent(String.self, forKey: .prependScheme) + + let pref = try container.decodeIfPresent(String.self, forKey: .pref) + let action = try container.decode(String.self, forKey: .action) + self.actions = try Self.makeActions(fromString: action) + self.preferences = try Self.makePreferences(fromString: pref) + } + + /// Actions in a strictly typed format and split up into an array + /// - Note: Unrecognized actions will be filtered out + private static func makeActions(fromString string: String) throws -> Set { + let actionStrings = string.split(separator: ",") + + return Set(try actionStrings.map({ + if let action = Action(rawValue: String($0)) { + return action + } else { + throw RuleError.unsupportedAction(String($0)) + } + })) + } + + /// Actions in a strictly typed format and split up into an array + /// - Note: Unrecognized actions will be filtered out + private static func makePreferences(fromString string: String?) throws -> Set { + guard let prefStrings = string?.split(separator: ",") else { + return [] + } + + return Set(try prefStrings.map({ + if let action = Preferance(rawValue: String($0)) { + return action + } else { + throw RuleError.unsupportedPref(String($0)) + } + })) + } + } + + /// Defines redirect actions that should be performed on the matching url. + /// These rules contain additional information needed to successfully extract the redirect URL. + /// + /// - Note: Unlike `MatcherRule.Action`, only one action can be applied on a single url + public enum RedirectAction { + /// An action that requires the url to be extracted by applying a regex pattern on the path + case regexParam(regex: String) + /// An action that requires the url to be extracted from a query param given by its name + case queryParam(name: String) + } + + /// An object representing a complete and sanitized redirect rule with valid preferences and actions. + public struct RedirectRule { + let action: RedirectAction + let shouldBase64Decode: Bool + let preferences: Set + let prependScheme: String? + + /// Extracts a redirect url from the given url. This function appled all necessary actions (MatcherRule.Action) + /// and returns a value that is ready to go and can be applied. + /// + /// - Note: You still need to check the preferences however. + public func extractRedirectURL(from url: URL) throws -> URL? { + guard let value = try extractDecodedValue(from: url) else { return nil } + + if let prependScheme = prependScheme { + return makeRedirectURL(string: value, prependScheme: prependScheme) + } else { + return makeRedirectURL(string: value) + } + } + + /// Extracts and decodes a matching value from the given url. + /// + /// - Note: This value is not yet ready to be transformed into a URL as further actions, + /// such as prepending a scheme, must be performed. + private func extractDecodedValue(from url: URL) throws -> String? { + guard let value = try extractRawValue(from: url) else { return nil } + + if shouldBase64Decode { + // We need to base64 decode the url + guard let data = Data(base64Encoded: value) else { + return nil + } + + return String(data: data, encoding: .utf8) + } else { + return value + } + } + + /// Attempt to extract a raw value without any decoding. + /// - Note: This value is not yet ready to be transformed into a URL as the value may still need to be decoded. + private func extractRawValue(from url: URL) throws -> String? { + guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { return nil } + + switch action { + case .regexParam(let pattern): + let path = components.path + let regex = try NSRegularExpression(pattern: pattern) + let range = NSRange(location: 0, length: path.count) + let matches = regex.matches(in: path, range: range) + + // This is a requirement that only 1 match is found + guard matches.count == 1, let match = matches.first else { return nil } + let matchRange = match.range(at: 1) + + if let swiftRange = Range(matchRange, in: path) { + let result = String(path[swiftRange]) + return result + } else { + return nil + } + case .queryParam(let param): + guard let queryItems = components.queryItems else { return nil } + return queryItems.first(where: { $0.name == param })?.value + } + } + + /// Checks if the given string represents a valid URL. + /// + /// For a URL to be valid it must: + /// 1. Be a valid scheme + /// 2. Is a proper eTLD+1 or an IP address + /// 3. IDN check if it's not in the registry and is not an IP address + /// + /// Example: + /// - `https://basicattentiontoken.com` returns `true` + /// - `basicattentiontoken.com` returns `true` + /// - `basicattentiontoken.com/` returns `true` + /// - `https://xyz` returns `false` + /// - `xyz` returns `false` + private func isValidRedirectURL(string: String) -> Bool { + guard let url = URL(string: string), url.schemeIsValid, let host = url.host else { return false } + + // Handle IP addresses + guard !NSURL.isHostIPAddress(host: string) else { + // If we have an IP address we only care about a valid scheme + return true + } + + // Check for 2 host components seperated by a `.` (eTLD+1) + let etld1 = NSURL.domainAndRegistry(host: host) + guard !etld1.isEmpty else { return false } + + // Handle urls in the registry + guard !NSURL.registry(host: string).isEmpty else { + // If this address is in the registry, we skip the idn check + // We only care about the scheme check and path check only + return true + } + + // If this is not in the registry and is not an IP address, + // we do an idn check, along with the scheme and path check + return NSURL(idnString: string) != nil + } + + /// Tries to construct a valid URL string by checking `isValidRedirectURL` + /// and ensuring that there is a valid URL scheme. + func makeRedirectURL(string: String) -> URL? { + guard isValidRedirectURL(string: string) else { return nil } + guard let url = URL(string: string), url.schemeIsValid else { return nil } + return url + } + + /// Attempts to make a redirect url by pre-pending the scheme. + /// + /// - Note: If the raw value alread has a scheme, this method will return nil. + func makeRedirectURL(string: String, prependScheme: String) -> URL? { + guard !isValidRedirectURL(string: string) else { + // When we need to apply the scheme, we should not have a valid url before applying the scheme. + return nil + } + + if let url = makeRedirectURL(string: [prependScheme, string].joined()) { + return url + } else if let url = makeRedirectURL(string: [prependScheme, string].joined(separator: "://")) { + return url + } else { + return nil + } + } } /// A class that, given a set of rules, helps in returning a redirect URL @@ -92,45 +285,63 @@ public class DebouncingResourceDownloader { /// 3. For each rule in bucket A, for each URL pattern, pull the eTLD+1 out of the URL pattern, and add it to the map from 2 (so, the eTLD+1 of the URL pattern points to the containing rule) /// 4. Create a map of query parameter name -> bounce tracking rule /// 5. For each rule in bucket B, for each query parameter, add it to the map from step 4 - init(rules: [MatcherRule]) { + init(rules: [Result]) { var etldToRule: [String: [MatcherRuleEntry]] = [:] // A var queryToRule: [String: MatcherRule] = [:] // B var otherRules: [MatcherRule] = [] // C - for rule in rules { - var etldToRelevantPatterns: [String: [String]] = [:] - - for pattern in rule.include { - let urlPattern = URLPattern(validSchemes: .all) - let parseResult = urlPattern.parse(pattern: pattern) - - switch parseResult { - case .success: - if urlPattern.isMatchingAllURLs { - // We put this query param and rule to our A bucket - // This seems to be an empty list for now - queryToRule[rule.param] = rule - } else if let etld1 = urlPattern.baseDomain { - // We put this etld and rule to our B bucket - var relevantPatterns = etldToRelevantPatterns[etld1] ?? [] - relevantPatterns.append(pattern) - etldToRelevantPatterns[etld1] = relevantPatterns - } else { - // Everything else goes to our C bucket - // For the time being this set only encompases patterns without a host - otherRules.append(rule) + for result in rules { + do { + let rule = try result.get() + var etldToRelevantPatterns: [String: [String]] = [:] + + for pattern in rule.include { + let urlPattern = URLPattern(validSchemes: .all) + let parseResult = urlPattern.parse(pattern: pattern) + let actions = rule.actions + + switch parseResult { + case .success: + if urlPattern.isMatchingAllURLs && !actions.contains(.regexPath) { + // We put this query param and rule to our A bucket + // This seems to be an empty list for now + queryToRule[rule.param] = rule + } else if let etld1 = urlPattern.baseDomain, !etld1.isEmpty { + // We put this etld and rule to our B bucket + var relevantPatterns = etldToRelevantPatterns[etld1] ?? [] + relevantPatterns.append(pattern) + etldToRelevantPatterns[etld1] = relevantPatterns + } else { + // Everything else goes to our C bucket + // For the time being this set only encompases patterns without a host + otherRules.append(rule) + } + default: + log.error("Cannot parse pattern `\(pattern)`. Got status `\(parseResult)`") + continue } - default: - log.error("Cannot parse pattern `\(pattern)`. Got status `\(parseResult)`") - continue } - } - - // Add or append the entry for each etld - for (etld1, relevantPatterns) in etldToRelevantPatterns { - var entries = etldToRule[etld1] ?? [] - entries.append((rule, relevantPatterns)) - etldToRule[etld1] = entries + + // Add or append the entry for each etld + for (etld1, relevantPatterns) in etldToRelevantPatterns { + var entries = etldToRule[etld1] ?? [] + entries.append((rule, relevantPatterns)) + etldToRule[etld1] = entries + } + } catch RuleError.unsupportedAction(let rawRule) { + // We may add new actions in the future. If thats the case, this will throw but we don't want + // an error presented or anything to happen because iOS is not ready to handle this new rule + // But since it's impossible to discriminate a programmer mistake from a new rule added + // we at least log this error. + log.error("Unsupported rule action used for debouncing: `\(rawRule)`") + } catch RuleError.unsupportedPref(let rawRule) { + // We may add new prefs in the future. If thats the case, this will throw but we don't want + // an error presented or anything to happen because iOS is not ready to handle this new rule + // But since it's impossible to discriminate a programmer mistake from a new rule added + // we at least log this error. + log.error("Unsupported rule preference used for debouncing: `\(rawRule)`") + } catch { + log.error(error) } } @@ -138,16 +349,16 @@ public class DebouncingResourceDownloader { self.queryToRule = queryToRule self.otherRules = otherRules } - + /// Get redirect url recursively by continually applying the matcher rules to each url returned until we have no more redirects. /// Only handles `http` and `https` schemes. - func redirectURLAll(from url: URL) -> URL? { + func redirectChain(from url: URL) -> [(url: URL, rule: RedirectRule)] { var redirectingURL = url - var result: URL? + var result: [(url: URL, rule: RedirectRule)] = [] - while let nextURL = redirectURLOnce(from: redirectingURL) { - redirectingURL = nextURL - result = nextURL + while let nextElement = redirectURLOnce(from: redirectingURL) { + redirectingURL = nextElement.url + result.append(nextElement) } return result @@ -165,20 +376,17 @@ public class DebouncingResourceDownloader { /// (As an optimization, we only need to check the pattern the `eTLD+1` was extracted from and not the whole include list) /// 3. If any of the query params (the keys) in the map from #4 above are in URL we might navigate to, apply the corresponding rule /// 4. Apply all the rules from bucket C - func redirectURLOnce(from url: URL) -> URL? { + private func redirectURLOnce(from url: URL) -> (url: URL, rule: RedirectRule)? { + guard let rule = matchingRedirectRule(for: url) else { + return nil + } + do { - guard let pair = try matchingQueryItemAndActions(for: url) else { + if let extractedURL = try rule.extractRedirectURL(from: url), url.host != extractedURL.host { + return (extractedURL, rule) + } else { return nil } - - return redirectURL(queryItem: pair.queryItem, actions: pair.actions) - } catch MatcherRule.RuleError.unsupportedAction(let rawRule) { - // We may add new rules in the future. If thats the case, this will throw but we don't want - // an error presented or anything to happen because ios is not ready to handle this new rule - // But since it's impossible to discriminate a programmer mistake from a new rule added - // we at least log this error. - log.error("Unsupported rule used for debouncing: `\(rawRule)`") - return nil } catch { assertionFailure(error.localizedDescription) return nil @@ -186,84 +394,66 @@ public class DebouncingResourceDownloader { } /// Attempts to find a valid query param and a set of actions for the given URL - /// - Throws: May throw `DebouncingResourceDownloader.MatcherRule.RuleError` - private func matchingQueryItemAndActions(for url: URL) throws -> (queryItem: URLQueryItem, actions: Set)? { - // Extract the redirect URL - guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { return nil } - guard let queryItems = components.queryItems else { return nil } - guard let etld1 = url.baseDomain else { return nil } - - if let entries = etldToRule[etld1] { + /// - Throws: May throw `DebouncingResourceDownloader.RuleError` + private func matchingRedirectRule(for url: URL) -> RedirectRule? { + guard let rule = matchingRule(for: url) else { + return nil + } + + let actions = rule.actions + let preferences = rule.preferences + + if actions.contains(.regexPath) { + return RedirectRule( + action: RedirectAction.regexParam(regex: rule.param), + shouldBase64Decode: actions.contains(.base64), + preferences: preferences, + prependScheme: rule.prependScheme + ) + } else { + return RedirectRule( + action: RedirectAction.queryParam(name: rule.param), + shouldBase64Decode: actions.contains(.base64), + preferences: preferences, + prependScheme: rule.prependScheme + ) + } + } + + /// Attempts to find a valid rule for the given URL + private func matchingRule(for url: URL) -> MatcherRule? { + if let etld1 = url.baseDomain, let entries = etldToRule[etld1] { guard let entry = entries.first(where: { url.matches(any: $0.relevantPatterns) }), - !entry.rule.isExcluded(url: url), - let queryItem = queryItems.first(where: { $0.name == entry.rule.param }) + !entry.rule.isExcluded(url: url) else { return nil } - return (queryItem, try entry.rule.makeActions()) - } else if let pair = try matchingQueryItemsAndActions(for: url, queryItems: queryItems) { - return (pair.queryItem, pair.actions) - } else if let rule = otherRules.first(where: { $0.handles(url: url) }) { - guard let queryItem = queryItems.first(where: { $0.name == rule.param }) else { - return nil - } - - return (queryItem, try rule.makeActions()) + return entry.rule } else { - return nil + return matchingCachedQueryParamRule(for: url) + ?? otherRules.first(where: { $0.handles(url: url) }) } } /// Attempt to extract the query value and actions from the list of queryItems using the `queryToRule` (all urls) map. /// - /// - Parameter queryItems: The query items to extract the value and actions from - /// - Returns: A raw query item and actions to perform on that query item - /// - Throws: May throw `DebouncingResourceDownloader.MatcherRule.RuleError` - private func matchingQueryItemsAndActions( - for url: URL, - queryItems: [URLQueryItem] - ) throws -> (queryItem: URLQueryItem, actions: Set)? { + /// - Parameter url: The url to extract the query params from + /// - Returns: A rule for the given url + private func matchingCachedQueryParamRule(for url: URL) -> MatcherRule? { + // Extract the redirect URL + guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { return nil } + guard let queryItems = components.queryItems else { return nil } + for queryItem in queryItems { - guard - let rule = queryToRule[queryItem.name], - !rule.isExcluded(url: url) - else { - continue + if let rule = queryToRule[queryItem.name], !rule.isExcluded(url: url) { + return rule } - - return (queryItem, try rule.makeActions()) } return nil } - - /// It attempts to extract a redirect `URL` from the given query item using the given actions - /// - /// It returns nil if it cannot find a valid URL within the query item - private func redirectURL(queryItem: URLQueryItem, actions: Set) -> URL? { - guard let queryValue = queryItem.value else { return nil } - - // For now we only support redirecting so it makes sense to check this right away - // No point in trying to decode anything if we don't have this action - guard actions.contains(.redirect) else { - return nil - } - - if actions.contains(.base64) { - // We need to base64 decode the url - guard let data = Data(base64Encoded: queryValue), - let decodedString = String(data: data, encoding: .utf8) - else { - return nil - } - - return URL(string: decodedString) - } else { - return URL(string: queryValue) - } - } } /// The base s3 environment url that hosts the debouncing (and other) files. @@ -303,11 +493,13 @@ public class DebouncingResourceDownloader { } /// Setup this downloader with rule `JSON` data. + /// + /// - Note: Decoded values that have unknown types are filtered out func setup(withRulesJSON ruleData: Data) throws { // Decode the data and store it for later user let jsonDecoder = JSONDecoder() jsonDecoder.keyDecodingStrategy = .convertFromSnakeCase - let rules = try jsonDecoder.decode([MatcherRule].self, from: ruleData) + let rules = try jsonDecoder.decode([Result].self, from: ruleData) matcher = Matcher(rules: rules) } @@ -391,13 +583,13 @@ public class DebouncingResourceDownloader { } } } - + /// Get a possible redirect url for the given URL. Does this /// /// This code uses the downloade `Matcher` to determine if a redirect action is required. /// If it is, a redirect url will be returned provided we can extract it from the url. - func redirectURL(for url: URL) -> URL? { - return matcher?.redirectURLAll(from: url) + func redirectChain(for url: URL) -> [(url: URL, rule: RedirectRule)] { + return matcher?.redirectChain(from: url) ?? [] } /// Load data from disk given by the folderName and fileName @@ -460,3 +652,14 @@ private extension URLPattern { return result.isEmpty ? nil : result } } + +extension Result: Decodable where Success: Decodable, Failure == Error { + public init(from decoder: Decoder) throws { + do { + let decodable = try Success(from: decoder) + self = .success(decodable) + } catch { + self = .failure(error) + } + } +} diff --git a/Sources/Shared/Extensions/ArrayExtensions.swift b/Sources/Shared/Extensions/ArrayExtensions.swift index 558e0c83bdb..3786380cd9e 100644 --- a/Sources/Shared/Extensions/ArrayExtensions.swift +++ b/Sources/Shared/Extensions/ArrayExtensions.swift @@ -79,6 +79,12 @@ public extension Array { Array(self[$0.. Bool) -> ArraySlice { + let index = firstIndex(where: { !condition($0) }) ?? self.count + return self[0.. 0 }), + emptyArray[0...] + ) + } }