From 1dd261cdcdc5c2b1b3bc8ad796f29735b3cc4e06 Mon Sep 17 00:00:00 2001 From: Jacob Sikorski Date: Tue, 26 Jul 2022 11:32:16 -0600 Subject: [PATCH] Fix #5373: Add debounce de-amp rules (#5536) --- ...rViewController+WKNavigationDelegate.swift | 39 +- .../DebouncingResourceDownloader.swift | 467 +++++++++++++----- .../Shared/Extensions/ArrayExtensions.swift | 6 + Tests/ClientTests/Resources/debouncing.json | 61 +++ .../DebouncingResourceDownloaderTests.swift | 367 +++++++++++--- Tests/SharedTests/ArrayExtensionTests.swift | 25 + 6 files changed, 772 insertions(+), 193 deletions(-) diff --git a/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift b/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift index c87f3402351..ebe47fc4651 100644 --- a/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift +++ b/Client/Frontend/Browser/BrowserViewController/BrowserViewController+WKNavigationDelegate.swift @@ -222,20 +222,37 @@ extension BrowserViewController: WKNavigationDelegate { let currentURL = tab?.webView?.url, currentURL.baseDomain != url.baseDomain, domainForRequestURL.isShieldExpected(.AdblockAndTp, considerAllShieldsOption: true), - navigationAction.targetFrame?.isMainFrame == true, - let redirectURL = DebouncingResourceDownloader.shared.redirectURL(for: url) { - // Cancel the original request. We don't want it to load as it's tracking us - decisionHandler(.cancel, preferences) + navigationAction.targetFrame?.isMainFrame == true { + + // Lets get the redirect chain. + // Then we simply get all elements up until the user allows us to redirect + // (i.e. appropriate settings are enabled for that redirect rule) + let redirectChain = DebouncingResourceDownloader.shared + .redirectChain(for: url) + .contiguousUntil { _, rule in + return rule.preferences.allSatisfy { pref in + switch pref { + case .deAmpEnabled: + return Preferences.Shields.autoRedirectAMPPages.value + } + } + } + + // Once we check the redirect chain only need the last (final) url from our redirect chain + if let redirectURL = redirectChain.last?.url { + // Cancel the original request. We don't want it to load as it's tracking us + decisionHandler(.cancel, preferences) - // For now we only allow the `Referer`. The browser will other headers during navigation. - var modifiedRequest = URLRequest(url: redirectURL) + // For now we only allow the `Referer`. The browser will add other headers during navigation. + var modifiedRequest = URLRequest(url: redirectURL) - for (headerKey, headerValue) in navigationAction.request.allHTTPHeaderFields ?? [:] { - guard headerKey == "Referer" else { continue } - modifiedRequest.setValue(headerValue, forHTTPHeaderField: headerKey) - } + for (headerKey, headerValue) in navigationAction.request.allHTTPHeaderFields ?? [:] { + guard headerKey == "Referer" else { continue } + modifiedRequest.setValue(headerValue, forHTTPHeaderField: headerKey) + } - tab?.loadRequest(modifiedRequest) + tab?.loadRequest(modifiedRequest) + } } // Add de-amp script diff --git a/Client/WebFilters/DebouncingResourceDownloader.swift b/Client/WebFilters/DebouncingResourceDownloader.swift index 650f1755a04..d94ffd5826b 100644 --- a/Client/WebFilters/DebouncingResourceDownloader.swift +++ b/Client/WebFilters/DebouncingResourceDownloader.swift @@ -12,19 +12,40 @@ private let log = Logger.browserLogger /// An helper class that manages the debouncing list and offers convenience methods public class DebouncingResourceDownloader { + /// An object representing errors returned from `MatcherRule` + enum RuleError: Error { + /// An error returned during parsing when an action is not supported by iOS. + /// This could be a valid scenario if we decide to add new actions in the future + case unsupportedAction(String) + /// An error returned during parsing when an action is not supported by iOS + /// This could be a valid scenario if we decide to add new prefs in the future + case unsupportedPreference(String) + } + /// Object representing an item in the debouncing.json file found here: /// https://github.com/brave/adblock-lists/blob/master/brave-lists/debounce.json struct MatcherRule: Decodable { private enum CodingKeys: String, CodingKey { - case include - case exclude - case action - case param + case include, exclude, action, param, pref, prependScheme } + /// Defines actions that should be performed on the matching url + /// - Note: Some rules can be paired while others cannot. enum Action: String { + /// Requres the extracted query param to be base64 decoded + /// The extracted url is provided as a query param and the param name is defined in the `param` field case base64 + /// Defines that the url should be redirected to the extracted url + /// The extracted url is provided as a query param and the param name is defined in the `param` field + /// unless `regex-path` is used case redirect + /// Defines that the url should be extracted by a regex pattern. + /// The regex is available in the `param` field. + case regexPath = "regex-path" + } + + enum Preference: String { + case deAmpEnabled = "brave.de_amp.enabled" } /// A set of patterns that are include in this rule @@ -33,15 +54,12 @@ public class DebouncingResourceDownloader { let exclude: Set? /// The query param that contains the redirect `URL` to be extractted from let param: String - /// The actions in a raw format. Need to be broken down to a set of `Action` entries - private let action: String - - /// Actions in a strictly typed format and split up into an array - /// - Note: Unrecognized actions will be filtered out - var actions: Set { - let actionStrings = action.split(separator: ",") - return Set(actionStrings.compactMap({ Action(rawValue: String($0)) })) - } + /// The scheme to prepend if the scheme is not available + let prependScheme: String? + /// The actions to be performed on the URL + let actions: Set + /// A list of preferences to be checked before debouncing + let preferences: Set /// Determines if this url is not excluded in the exclude list func isExcluded(url: URL) -> Bool { @@ -59,6 +77,175 @@ public class DebouncingResourceDownloader { // and is in the include list return !isExcluded(url: url) && isIncluded(url: url) } + + public init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + self.include = try container.decode(Set.self, forKey: .include) + self.exclude = try container.decode(Set.self, forKey: .exclude) + self.param = try container.decode(String.self, forKey: .param) + self.prependScheme = try container.decodeIfPresent(String.self, forKey: .prependScheme) + + let pref = try container.decodeIfPresent(String.self, forKey: .pref) + let action = try container.decode(String.self, forKey: .action) + self.actions = try Self.makeActions(fromString: action) + self.preferences = try Self.makePreferences(fromString: pref) + } + + /// Actions in a strictly typed format and split up into an array + /// - Note: Unrecognized actions will throw an error + private static func makeActions(fromString string: String) throws -> Set { + let actionStrings = string.split(separator: ",") + + return Set(try actionStrings.map({ + if let action = Action(rawValue: String($0)) { + return action + } else { + throw RuleError.unsupportedAction(String($0)) + } + })) + } + + /// Preferences in a strictly typed format and split up into an array + /// - Note: Unrecognized preferences will throw an error + private static func makePreferences(fromString string: String?) throws -> Set { + guard let prefStrings = string?.split(separator: ",") else { + return [] + } + + return Set(try prefStrings.map({ + if let action = Preference(rawValue: String($0)) { + return action + } else { + throw RuleError.unsupportedPreference(String($0)) + } + })) + } + } + + /// Defines redirect actions that should be performed on the matching url. + /// These rules contain additional information needed to successfully extract the redirect URL. + /// + /// - Note: Unlike `MatcherRule.Action`, only one action can be applied on a single url + public enum RedirectAction { + /// An action that requires the url to be extracted by applying a regex pattern on the path + case regexParam(regex: String) + /// An action that requires the url to be extracted from a query param given by its name + case queryParam(name: String) + } + + /// An object representing a complete and sanitized redirect rule with valid preferences and actions. + public struct RedirectRule { + let action: RedirectAction + let shouldBase64Decode: Bool + let preferences: Set + let prependScheme: String? + + /// Extracts a redirect url from the given url. This function appled all necessary actions (MatcherRule.Action) + /// and returns a value that is ready to go and can be applied. + /// + /// - Note: You still need to check the preferences however. + public func extractRedirectURL(from url: URL) throws -> URL? { + guard let value = try extractDecodedValue(from: url) else { return nil } + + if let prependScheme = prependScheme { + return makeRedirectURL(string: value, prependScheme: prependScheme) + } else { + return makeRedirectURL(string: value) + } + } + + /// Extracts and decodes a matching value from the given url. + /// + /// - Note: This value is not yet ready to be transformed into a URL as further actions, + /// such as prepending a scheme, must be performed. + private func extractDecodedValue(from url: URL) throws -> String? { + guard let value = try extractRawValue(from: url) else { return nil } + + if shouldBase64Decode { + // We need to base64 decode the url + guard let data = Data(base64Encoded: value) else { + return nil + } + + return String(data: data, encoding: .utf8) + } else { + return value + } + } + + /// Attempt to extract a raw value without any decoding. + /// - Note: This value is not yet ready to be transformed into a URL as the value may still need to be decoded. + private func extractRawValue(from url: URL) throws -> String? { + guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { return nil } + + switch action { + case .regexParam(let pattern): + // This is a requirement that the pattern is less than 200 characters long + guard pattern.count < 200 else { return nil } + + let path = components.path + let regex = try NSRegularExpression(pattern: pattern) + + // This is a requirement that there is only 1 capture group + guard regex.numberOfCaptureGroups == 1 else { return nil } + + let range = NSRange(location: 0, length: path.count) + let matches = regex.matches(in: path, range: range) + guard matches.count == 1, let match = matches.first else { return nil } + let matchRange = match.range(at: 1) + + if let swiftRange = Range(matchRange, in: path) { + let result = String(path[swiftRange]) + return result + } else { + return nil + } + case .queryParam(let param): + guard let queryItems = components.queryItems else { return nil } + return queryItems.first(where: { $0.name == param })?.value + } + } + + /// Checks if the given string represents a valid URL we can redirect to. + /// + /// For a URL to be valid it must: + /// 1. Have a valid scheme and have a host + /// 2. Is an `IP` address or have a valid `eTLD+1` + /// + /// Example: + /// - `https://basicattentiontoken.com` returns `true` because tthe host is in the registry and has a scheme + /// - `basicattentiontoken.com` returns `false` because the scheme is missing + /// - `https://xyz` returns `false` because it has no eTLD+1 + /// - `xyz` returns `false` because scheme is missing, has no eTLD+1 + private func isValidRedirectURL(string: String) -> Bool { + guard let url = URL(string: string), url.schemeIsValid, let host = url.host else { return false } + + // Check that we have an IP address or 2 host components seperated by a `.` (eTLD+1) + return NSURL.isHostIPAddress(host: host) || !NSURL.domainAndRegistry(host: host).isEmpty + } + + /// Tries to construct a valid URL string by checking `isValidRedirectURL` + /// and ensuring that there is a valid URL scheme. + func makeRedirectURL(string: String) -> URL? { + guard isValidRedirectURL(string: string) else { return nil } + return URL(string: string) + } + + /// Attempts to make a redirect url by pre-pending the scheme. + /// + /// - Note: If the raw value alread has a scheme, this method will return nil. + func makeRedirectURL(string: String, prependScheme: String) -> URL? { + guard !isValidRedirectURL(string: string) else { + // When we need to apply the scheme, we should not have a valid url before applying the scheme. + return nil + } + + if let url = makeRedirectURL(string: [prependScheme, string].joined(separator: "://")) { + return url + } else { + return nil + } + } } /// A class that, given a set of rules, helps in returning a redirect URL @@ -80,45 +267,63 @@ public class DebouncingResourceDownloader { /// 3. For each rule in bucket A, for each URL pattern, pull the eTLD+1 out of the URL pattern, and add it to the map from 2 (so, the eTLD+1 of the URL pattern points to the containing rule) /// 4. Create a map of query parameter name -> bounce tracking rule /// 5. For each rule in bucket B, for each query parameter, add it to the map from step 4 - init(rules: [MatcherRule]) { + init(rules: [Result]) { var etldToRule: [String: [MatcherRuleEntry]] = [:] // A var queryToRule: [String: MatcherRule] = [:] // B var otherRules: [MatcherRule] = [] // C - for rule in rules { - var etldToRelevantPatterns: [String: [String]] = [:] - - for pattern in rule.include { - let urlPattern = URLPattern(validSchemes: .all) - let parseResult = urlPattern.parse(pattern: pattern) - - switch parseResult { - case .success: - if urlPattern.isMatchingAllURLs { - // We put this query param and rule to our A bucket - // This seems to be an empty list for now - queryToRule[rule.param] = rule - } else if let etld1 = urlPattern.baseDomain { - // We put this etld and rule to our B bucket - var relevantPatterns = etldToRelevantPatterns[etld1] ?? [] - relevantPatterns.append(pattern) - etldToRelevantPatterns[etld1] = relevantPatterns - } else { - // Everything else goes to our C bucket - // For the time being this set only encompases patterns without a host - otherRules.append(rule) + for result in rules { + do { + let rule = try result.get() + var etldToRelevantPatterns: [String: [String]] = [:] + + for pattern in rule.include { + let urlPattern = URLPattern(validSchemes: .all) + let parseResult = urlPattern.parse(pattern: pattern) + let actions = rule.actions + + switch parseResult { + case .success: + if urlPattern.isMatchingAllURLs && !actions.contains(.regexPath) { + // We put this query param and rule to our A bucket + // This seems to be an empty list for now + queryToRule[rule.param] = rule + } else if let etld1 = urlPattern.baseDomain, !etld1.isEmpty { + // We put this etld and rule to our B bucket + var relevantPatterns = etldToRelevantPatterns[etld1] ?? [] + relevantPatterns.append(pattern) + etldToRelevantPatterns[etld1] = relevantPatterns + } else { + // Everything else goes to our C bucket + // For the time being this set only encompases patterns without a host + otherRules.append(rule) + } + default: + log.error("Cannot parse pattern `\(pattern)`. Got status `\(parseResult)`") + continue } - default: - log.error("Cannot parse pattern `\(pattern)`. Got status `\(parseResult)`") - continue } - } - - // Add or append the entry for each etld - for (etld1, relevantPatterns) in etldToRelevantPatterns { - var entries = etldToRule[etld1] ?? [] - entries.append((rule, relevantPatterns)) - etldToRule[etld1] = entries + + // Add or append the entry for each etld + for (etld1, relevantPatterns) in etldToRelevantPatterns { + var entries = etldToRule[etld1] ?? [] + entries.append((rule, relevantPatterns)) + etldToRule[etld1] = entries + } + } catch RuleError.unsupportedAction(let rawRule) { + // We may add new actions in the future. If thats the case, this will throw but we don't want + // an error presented or anything to happen because iOS is not ready to handle this new rule + // But since it's impossible to discriminate a programmer mistake from a new rule added + // we at least log this error. + log.error("Unsupported rule action used for debouncing: `\(rawRule)`") + } catch RuleError.unsupportedPreference(let rawRule) { + // We may add new prefs in the future. If thats the case, this will throw but we don't want + // an error presented or anything to happen because iOS is not ready to handle this new rule + // But since it's impossible to discriminate a programmer mistake from a new rule added + // we at least log this error. + log.error("Unsupported rule preference used for debouncing: `\(rawRule)`") + } catch { + log.error(error) } } @@ -126,26 +331,80 @@ public class DebouncingResourceDownloader { self.queryToRule = queryToRule self.otherRules = otherRules } - + /// Get redirect url recursively by continually applying the matcher rules to each url returned until we have no more redirects. - /// Only handles `http` and `https` schemes. - func redirectURLAll(from url: URL) -> URL? { + /// + /// The following conditions must be met to redirect: + /// 1. Must have a `MatcherRule` that satisifes this url + /// 2. Extracted URL not be the same domain as the base URL + /// 3. Extracted URL must be a valid URL (Have valid scheme, have an `eTLD+1` or be an IP address etc.. ) + /// 4. Extracted URL must have a `http` or `https` scheme + func redirectChain(from url: URL) -> [(url: URL, rule: RedirectRule)] { var redirectingURL = url - var result: URL? + var result: [(url: URL, rule: RedirectRule)] = [] - while let nextURL = redirectURLOnce(from: redirectingURL) { - redirectingURL = nextURL - result = nextURL + do { + while let nextElement = try redirectURLOnce(from: redirectingURL) { + redirectingURL = nextElement.url + result.append(nextElement) + } + } catch { + log.error(error) } return result } - /// Get a possible redirect url for the given URL and the given matchers. Will only get the next redirect url. - /// Only handles `http` and `https` schemes. + /// Get a possible redirect URL and rule for the given URL. + /// This only returns the next redirect entry and does not process the returned urls recursively. /// - /// This code uses the patterns in the given matchers to determine if a redirect action is required. - /// If it is, a redirect url will be returned provided we can extract it from the url + /// The following conditions must be met to redirect: + /// 1. Must have a `MatcherRule` that satisifes this url + /// 2. Extracted URL not be the same domain as the base URL + /// 3. Extracted URL must be a valid URL (Have valid scheme, have an `eTLD+1` or be an IP address etc.. ) + /// 4. Extracted URL must have a `http` or `https` scheme + private func redirectURLOnce(from url: URL) throws -> (url: URL, rule: RedirectRule)? { + guard let rule = matchingRedirectRule(for: url) else { + return nil + } + + guard let extractedURL = try rule.extractRedirectURL(from: url), + url.host != extractedURL.host, + extractedURL.scheme == "http" || extractedURL.scheme == "https" else { + return nil + } + + return (extractedURL, rule) + } + + /// Attempts to find a valid query param and a set of actions for the given URL + /// - Throws: May throw `DebouncingResourceDownloader.RuleError` + private func matchingRedirectRule(for url: URL) -> RedirectRule? { + guard let rule = matchingRule(for: url) else { + return nil + } + + let actions = rule.actions + let preferences = rule.preferences + + if actions.contains(.regexPath) { + return RedirectRule( + action: RedirectAction.regexParam(regex: rule.param), + shouldBase64Decode: actions.contains(.base64), + preferences: preferences, + prependScheme: rule.prependScheme + ) + } else { + return RedirectRule( + action: RedirectAction.queryParam(name: rule.param), + shouldBase64Decode: actions.contains(.base64), + preferences: preferences, + prependScheme: rule.prependScheme + ) + } + } + + /// Attempts to find a valid rule for the given URL /// /// **Rules** /// 1. For the given URL we *might* navigate too, pull out the `eTLD+1` and the query items @@ -153,82 +412,39 @@ public class DebouncingResourceDownloader { /// (As an optimization, we only need to check the pattern the `eTLD+1` was extracted from and not the whole include list) /// 3. If any of the query params (the keys) in the map from #4 above are in URL we might navigate to, apply the corresponding rule /// 4. Apply all the rules from bucket C - func redirectURLOnce(from url: URL) -> URL? { - // Extract the redirect URL - guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { return nil } - guard let queryItems = components.queryItems else { return nil } - guard let etld1 = url.baseDomain else { return nil } - - if let entries = etldToRule[etld1] { + private func matchingRule(for url: URL) -> MatcherRule? { + if let etld1 = url.baseDomain, let entries = etldToRule[etld1] { guard let entry = entries.first(where: { url.matches(any: $0.relevantPatterns) }), - !entry.rule.isExcluded(url: url), - let queryItem = queryItems.first(where: { $0.name == entry.rule.param }) + !entry.rule.isExcluded(url: url) else { return nil } - return redirectURL(queryItem: queryItem, actions: entry.rule.actions) - } else if let pair = matchingQueryItemsAndActions(for: url, queryItems: queryItems) { - return redirectURL(queryItem: pair.queryItem, actions: pair.actions) - } else if let rule = otherRules.first(where: { $0.handles(url: url) }) { - guard let queryItem = queryItems.first(where: { $0.name == rule.param }) else { - return nil - } - - return redirectURL(queryItem: queryItem, actions: rule.actions) + return entry.rule } else { - return nil + return matchingCachedQueryParamRule(for: url) + ?? otherRules.first(where: { $0.handles(url: url) }) } } /// Attempt to extract the query value and actions from the list of queryItems using the `queryToRule` (all urls) map. /// - /// - Parameter queryItems: The query items to extract the value and actions from - /// - Returns: A raw query item and actions to perform on that query item - private func matchingQueryItemsAndActions( - for url: URL, - queryItems: [URLQueryItem] - ) -> (queryItem: URLQueryItem, actions: Set)? { + /// - Parameter url: The url to extract the query params from + /// - Returns: A rule for the given url + private func matchingCachedQueryParamRule(for url: URL) -> MatcherRule? { + // Extract the redirect URL + guard let components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { return nil } + guard let queryItems = components.queryItems else { return nil } + for queryItem in queryItems { - guard - let rule = queryToRule[queryItem.name], - !rule.isExcluded(url: url) - else { - continue + if let rule = queryToRule[queryItem.name], !rule.isExcluded(url: url) { + return rule } - - return (queryItem, rule.actions) } return nil } - - /// It attempts to extract a redirect `URL` from the given query item using the given actions - /// - /// It returns nil if it cannot find a valid URL within the query item - private func redirectURL(queryItem: URLQueryItem, actions: Set) -> URL? { - guard let queryValue = queryItem.value else { return nil } - - // For now we only support redirecting so it makes sense to check this right away - // No point in trying to decode anything if we don't have this action - guard actions.contains(.redirect) else { - return nil - } - - if actions.contains(.base64) { - // We need to base64 decode the url - guard let data = Data(base64Encoded: queryValue), - let decodedString = String(data: data, encoding: .utf8) - else { - return nil - } - - return URL(string: decodedString) - } else { - return URL(string: queryValue) - } - } } /// The base s3 environment url that hosts the debouncing (and other) files. @@ -268,11 +484,13 @@ public class DebouncingResourceDownloader { } /// Setup this downloader with rule `JSON` data. + /// + /// - Note: Decoded values that have unknown types are filtered out func setup(withRulesJSON ruleData: Data) throws { // Decode the data and store it for later user let jsonDecoder = JSONDecoder() jsonDecoder.keyDecodingStrategy = .convertFromSnakeCase - let rules = try jsonDecoder.decode([MatcherRule].self, from: ruleData) + let rules = try jsonDecoder.decode([Result].self, from: ruleData) matcher = Matcher(rules: rules) } @@ -356,13 +574,13 @@ public class DebouncingResourceDownloader { } } } - + /// Get a possible redirect url for the given URL. Does this /// /// This code uses the downloade `Matcher` to determine if a redirect action is required. /// If it is, a redirect url will be returned provided we can extract it from the url. - func redirectURL(for url: URL) -> URL? { - return matcher?.redirectURLAll(from: url) + func redirectChain(for url: URL) -> [(url: URL, rule: RedirectRule)] { + return matcher?.redirectChain(from: url) ?? [] } /// Load data from disk given by the folderName and fileName @@ -425,3 +643,14 @@ private extension URLPattern { return result.isEmpty ? nil : result } } + +extension Result: Decodable where Success: Decodable, Failure == Error { + public init(from decoder: Decoder) throws { + do { + let decodable = try Success(from: decoder) + self = .success(decodable) + } catch { + self = .failure(error) + } + } +} diff --git a/Sources/Shared/Extensions/ArrayExtensions.swift b/Sources/Shared/Extensions/ArrayExtensions.swift index 558e0c83bdb..94b1fa81bf4 100644 --- a/Sources/Shared/Extensions/ArrayExtensions.swift +++ b/Sources/Shared/Extensions/ArrayExtensions.swift @@ -79,6 +79,12 @@ public extension Array { Array(self[$0.. Bool) -> ArraySlice { + let index = firstIndex(where: { !condition($0) }) ?? self.count + return self[0.. 0 }), + emptyArray[0...] + ) + } }