diff --git a/vendor/bat-native-ads/src/bat/ads/internal/ads_impl.cc b/vendor/bat-native-ads/src/bat/ads/internal/ads_impl.cc index 5baa82eb67ed..1be79e8f5dad 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/ads_impl.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/ads_impl.cc @@ -700,9 +700,15 @@ void AdsImpl::OnPageLoaded( return; } - if (TestSearchState(url)) { + if (TestSearchState(url) { BLOG(INFO) << "Site visited " << url << ", URL is a search engine"; - return; + + if (!SearchProviders::IsSearchEngineResultsPage(url)) { + return; + } + + BLOG(INFO) << "Site visited " << url << ", URL is a search engine results " + "page"; } TestShoppingData(url); diff --git a/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.cc b/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.cc index 700f49cebc2c..d2ca1449c76b 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.cc @@ -11,11 +11,9 @@ SearchProviderInfo::SearchProviderInfo() = default; SearchProviderInfo::SearchProviderInfo( const std::string& name, - const std::string& hostname, const std::string& search_template, bool is_always_classed_as_a_search) : name(name), - hostname(hostname), search_template(search_template), is_always_classed_as_a_search(is_always_classed_as_a_search) {} diff --git a/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.h b/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.h index 39a2678537c8..f3d2f3a07d88 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.h +++ b/vendor/bat-native-ads/src/bat/ads/internal/search_provider_info.h @@ -15,7 +15,6 @@ struct SearchProviderInfo { SearchProviderInfo(); SearchProviderInfo( const std::string& name, - const std::string& hostname, const std::string& search_template, bool is_always_classed_as_a_search); SearchProviderInfo( @@ -23,7 +22,6 @@ struct SearchProviderInfo { ~SearchProviderInfo(); std::string name; - std::string hostname; std::string search_template; bool is_always_classed_as_a_search = false; }; diff --git a/vendor/bat-native-ads/src/bat/ads/internal/search_providers.cc b/vendor/bat-native-ads/src/bat/ads/internal/search_providers.cc index 373ac17c2439..312190d83f4e 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/search_providers.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/search_providers.cc @@ -7,7 +7,7 @@ #include "bat/ads/internal/uri_helper.h" #include "third_party/re2/src/re2/re2.h" #include "url/gurl.h" - +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" namespace ads { @@ -17,74 +17,62 @@ SearchProviders::~SearchProviders() = default; bool SearchProviders::IsSearchEngine( const std::string& url) { - const GURL visited_url = GURL(url); - if (!visited_url.is_valid()) { + const std::string tld_plus_1 = helper::Uri::GetDomainAndRegistry(url); + const auto iter = mymap.find(tld_plus_1); + if (iter == mymap.end()) { return false; } - bool is_a_search = false; + const SearchProviderInfo search_provider = iter->second; - for (const auto& search_provider : _search_providers) { - const GURL search_provider_hostname = GURL(search_provider.hostname); - if (!search_provider_hostname.is_valid()) { - continue; - } + if (search_provider.is_always_classed_as_a_search) { + return true; + } - if (search_provider.is_always_classed_as_a_search && - visited_url.DomainIs(search_provider_hostname.host_piece())) { - is_a_search = true; - break; - } + if (!IsSearchEngineResultsPage(url)) { + return false; + } - size_t index = search_provider.search_template.find('{'); - std::string substring = search_provider.search_template.substr(0, index); - size_t href_index = url.find(substring); + return true; +} - if (index != std::string::npos && href_index != std::string::npos) { - is_a_search = true; - break; - } +bool SearchProviders::IsSearchEngineResultsPage( + const std::string& url) { + const std::string search_query_keywords = ExtractSearchQueryKeywords(url); + if (search_query_keywords.empty()) { + return false; } - return is_a_search; + return true; } std::string SearchProviders::ExtractSearchQueryKeywords( const std::string& url) { - std::string search_query_keywords = ""; + std::string search_query_keywords; - const GURL visited_url = GURL(url); - if (!visited_url.is_valid()) { + const std::string tld_plus_1 = helper::Uri::GetDomainAndRegistry(url); + const auto iter = mymap.find(tld_plus_1); + if (iter == mymap.end()) { return search_query_keywords; } - for (const auto& search_provider : _search_providers) { - GURL search_provider_hostname = GURL(search_provider.hostname); - if (!search_provider_hostname.is_valid()) { - continue; - } + const SearchProviderInfo search_provider = iter->second; - if (!visited_url.DomainIs(search_provider_hostname.host_piece())) { - continue; - } + size_t index = search_provider.search_template.find('{'); + std::string substring = search_provider.search_template.substr(0, index); + size_t href_index = url.find(substring); - size_t index = search_provider.search_template.find('{'); - std::string substring = search_provider.search_template.substr(0, index); - size_t href_index = url.find(substring); - - if (index != std::string::npos && href_index != std::string::npos) { - // Checking if search template in as defined in |search_providers.h| - // is defined, e.g. |https://searx.me/?q={searchTerms}&categories=general| - // matches |?q={| - std::string key; - if (!RE2::PartialMatch( - search_provider.search_template, "\\?(.*?)\\={", &key)) { - return search_query_keywords; - } - - search_query_keywords = helper::Uri::GetValueForKeyInQuery(url, key); - break; + if (index != std::string::npos && href_index != std::string::npos) { + // Checking if search template in as defined in |search_providers.h| + // is defined, e.g. |https://searx.me/?q={searchTerms}&categories=general| + // matches |?q={| + std::string key; + if (!RE2::PartialMatch( + search_provider.search_template, "\\?(.*?)\\={", &key)) { + return search_query_keywords; } + + search_query_keywords = helper::Uri::GetValueForKeyInQuery(url, key); } return search_query_keywords; diff --git a/vendor/bat-native-ads/src/bat/ads/internal/search_providers.h b/vendor/bat-native-ads/src/bat/ads/internal/search_providers.h index 478dd4f132f7..55aa4b57ae1a 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/search_providers.h +++ b/vendor/bat-native-ads/src/bat/ads/internal/search_providers.h @@ -6,125 +6,165 @@ #ifndef BAT_ADS_INTERNAL_SEARCH_PROVIDERS_H_ #define BAT_ADS_INTERNAL_SEARCH_PROVIDERS_H_ +#include #include #include #include "bat/ads/internal/search_provider_info.h" -namespace ads { - -const std::vector _search_providers = { - SearchProviderInfo( +const std::map _search_providers = { + "amazon.com", { "Amazon", - "https://amazon.com", - "https://www.amazon.com/exec/obidos/external-search/?field-keywords={searchTerms}&mode=blended", // NOLINT - false), - SearchProviderInfo( + "https://www.amazon.com/s?k={searchTerms}", + false + } + }, + { + "bing.com", { "Bing", - "https://bing.com", "https://www.bing.com/search?q={searchTerms}", - true), - SearchProviderInfo( + true + } + }, + { + "duckduckgo.com", { "DuckDuckGo", - "https://duckduckgo.com", "https://duckduckgo.com/?q={searchTerms}&t=brave", - true), - SearchProviderInfo( + true + } + }, + { + "fireball.com", { "Fireball", - "https://fireball.com", "https://fireball.com/search?q={searchTerms}", - true), - SearchProviderInfo( + true + } + }, + { + "github.com", { "GitHub", - "https://github.com", "https://github.com/search?q={searchTerms}", - false), - SearchProviderInfo( + false + } + }, + { + // TODO(https://github.com/brave/brave-browser/issues/8487): Brave Ads + // search providers definition doesn't match all patterns + "google.com", { "Google", - // TODO(https://github.com/brave/brave-browser/issues/8487): Brave Ads - // search providers definition doesn't match all patterns - "https://google.com", "https://www.google.com/search?q={searchTerms}", - true), - SearchProviderInfo( + true + } + }, + { + "stackoverflow.com", { "Stack Overflow", - "https://stackoverflow.com", "https://stackoverflow.com/search?q={searchTerms}", - false), - SearchProviderInfo( + false + } + }, + { + "developer.mozilla.org", { "MDN Web Docs", - "https://developer.mozilla.org", "https://developer.mozilla.org/search?q={searchTerms}", - false), - SearchProviderInfo( + false + } + }, + { + "twitter.com", { "Twitter", - "https://twitter.com", - "https://twitter.com/search?q={searchTerms}&source=desktop-search", - false), - SearchProviderInfo( + "https://twitter.com/search?q={searchTerms}", + false + } + }, + { + "en.wikipedia.org", { "Wikipedia", - "https://en.wikipedia.org", "https://en.wikipedia.org/wiki/Special:Search?search={searchTerms}", - false), - SearchProviderInfo( + false + } + }, + { + // TODO(https://github.com/brave/brave-browser/issues/8487): Brave Ads + // search providers definition doesn't match all patterns + "search.yahoo.com", { "Yahoo", - // TODO(https://github.com/brave/brave-browser/issues/8487): Brave Ads - // search providers definition doesn't match all patterns - "https://search.yahoo.com", "https://search.yahoo.com/search?p={searchTerms}&fr=opensearch", - true), - SearchProviderInfo( + true + } + }, + { + "youtube.com", { "YouTube", - "https://youtube.com", - "https://www.youtube.com/results?search_type=search_videos&search_query={searchTerms}&search_sort=relevance&search_category=0&page=", // NOLINT - false), - SearchProviderInfo( + "https://www.youtube.com/results?search_query={searchTerms}", + false + } + }, + { + // TODO(https://github.com/brave/brave-browser/issues/8487): Brave Ads + // search providers definition doesn't match all patterns + "startpage.com", { "StartPage", - // TODO(https://github.com/brave/brave-browser/issues/8487): Brave Ads - // search providers definition doesn't match all patterns - "https://startpage.com", - "https://www.startpage.com/do/dsearch?query={searchTerms}&cat=web&pl=opensearch", // NOLINT - true), - SearchProviderInfo( + "https://www.startpage.com/do/dsearch?query={searchTerms}&pl=opensearch", + true + } + }, + { + "infogalactic.com", { "Infogalactic", - "https://infogalactic.com", "https://infogalactic.com/w/index.php?title=Special:Search&search={searchTerms}", // NOLINT - false), - SearchProviderInfo( + false + } + }, + { + "wolframalpha.com", { "Wolfram Alpha", - "https://wolframalpha.com", "https://www.wolframalpha.com/input/?i={searchTerms}", - false), - SearchProviderInfo( + false + } + }, + { + "semanticscholar.org", { "Semantic Scholar", - "https://semanticscholar.org", "https://www.semanticscholar.org/search?q={searchTerms}", - true), - SearchProviderInfo( + true + } + }, + { + "qwant.com", { "Qwant", - "https://qwant.com", "https://www.qwant.com/?q={searchTerms}&client=brave", - true), - SearchProviderInfo( + true + } + }, + { + "yandex.com", { "Yandex", - "https://yandex.com", "https://yandex.com/search/?text={searchTerms}&clid=2274777", - true), - SearchProviderInfo( + true + } + }, + { + "ecosia.org", { "Ecosia", - "https://ecosia.org", "https://www.ecosia.org/search?q={searchTerms}", - true), - SearchProviderInfo( + true + } + }, + { + "searx.me", { "searx", - "https://searx.me", "https://searx.me/?q={searchTerms}&categories=general", - true), - SearchProviderInfo( + true + } + }, + { + "findx.com", { "findx", - "https://findx.com", "https://www.findx.com/search?q={searchTerms}&type=web", - true) + true + } + } }; class SearchProviders { @@ -134,6 +174,10 @@ class SearchProviders { static bool IsSearchEngine( const std::string& url); + + static bool IsSearchEngineResultsPage( + const std::string& url); + static std::string ExtractSearchQueryKeywords( const std::string& url); }; diff --git a/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.cc b/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.cc index ba5190896718..ec158276252a 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.cc @@ -74,4 +74,10 @@ bool Uri::MatchesDomainOrHost( net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); } +std::string Uri::GetDomainAndRegistry( + const std::string& url) { + return GetDomainAndRegistry(GURL(url), + net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); +} + } // namespace helper diff --git a/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.h b/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.h index f1ee52bbc9bb..5811e70b588f 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.h +++ b/vendor/bat-native-ads/src/bat/ads/internal/uri_helper.h @@ -26,6 +26,9 @@ class Uri { static bool MatchesDomainOrHost( const std::string& url1, const std::string& url2); + + static std::string GetDomainAndRegistry( + const std::string& url); }; } // namespace helper