Skip to content

Commit

Permalink
SERP
Browse files Browse the repository at this point in the history
  • Loading branch information
tmancey committed Mar 18, 2020
1 parent 67ace75 commit 2da9375
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 131 deletions.
10 changes: 8 additions & 2 deletions vendor/bat-native-ads/src/bat/ads/internal/ads_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -700,9 +700,15 @@ void AdsImpl::OnPageLoaded(
return;
}

if (TestSearchState(url)) {
if (TestSearchState(url) {
BLOG(INFO) << "Site visited " << url << ", URL is a search engine";
return;

if (!SearchProviders::IsSearchEngineResultsPage(url)) {
return;
}

BLOG(INFO) << "Site visited " << url << ", URL is a search engine results "
"page";
}

TestShoppingData(url);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ SearchProviderInfo::SearchProviderInfo() = default;

SearchProviderInfo::SearchProviderInfo(
const std::string& name,
const std::string& hostname,
const std::string& search_template,
bool is_always_classed_as_a_search)
: name(name),
hostname(hostname),
search_template(search_template),
is_always_classed_as_a_search(is_always_classed_as_a_search) {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,13 @@ struct SearchProviderInfo {
SearchProviderInfo();
SearchProviderInfo(
const std::string& name,
const std::string& hostname,
const std::string& search_template,
bool is_always_classed_as_a_search);
SearchProviderInfo(
const SearchProviderInfo& info);
~SearchProviderInfo();

std::string name;
std::string hostname;
std::string search_template;
bool is_always_classed_as_a_search = false;
};
Expand Down
86 changes: 37 additions & 49 deletions vendor/bat-native-ads/src/bat/ads/internal/search_providers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "bat/ads/internal/uri_helper.h"
#include "third_party/re2/src/re2/re2.h"
#include "url/gurl.h"

#include "net/base/registry_controlled_domains/registry_controlled_domain.h"

namespace ads {

Expand All @@ -17,74 +17,62 @@ SearchProviders::~SearchProviders() = default;

bool SearchProviders::IsSearchEngine(
const std::string& url) {
const GURL visited_url = GURL(url);
if (!visited_url.is_valid()) {
const std::string tld_plus_1 = helper::Uri::GetDomainAndRegistry(url);
const auto iter = mymap.find(tld_plus_1);
if (iter == mymap.end()) {
return false;
}

bool is_a_search = false;
const SearchProviderInfo search_provider = iter->second;

for (const auto& search_provider : _search_providers) {
const GURL search_provider_hostname = GURL(search_provider.hostname);
if (!search_provider_hostname.is_valid()) {
continue;
}
if (search_provider.is_always_classed_as_a_search) {
return true;
}

if (search_provider.is_always_classed_as_a_search &&
visited_url.DomainIs(search_provider_hostname.host_piece())) {
is_a_search = true;
break;
}
if (!IsSearchEngineResultsPage(url)) {
return false;
}

size_t index = search_provider.search_template.find('{');
std::string substring = search_provider.search_template.substr(0, index);
size_t href_index = url.find(substring);
return true;
}

if (index != std::string::npos && href_index != std::string::npos) {
is_a_search = true;
break;
}
bool SearchProviders::IsSearchEngineResultsPage(
const std::string& url) {
const std::string search_query_keywords = ExtractSearchQueryKeywords(url);
if (search_query_keywords.empty()) {
return false;
}

return is_a_search;
return true;
}

std::string SearchProviders::ExtractSearchQueryKeywords(
const std::string& url) {
std::string search_query_keywords = "";
std::string search_query_keywords;

const GURL visited_url = GURL(url);
if (!visited_url.is_valid()) {
const std::string tld_plus_1 = helper::Uri::GetDomainAndRegistry(url);
const auto iter = mymap.find(tld_plus_1);
if (iter == mymap.end()) {
return search_query_keywords;
}

for (const auto& search_provider : _search_providers) {
GURL search_provider_hostname = GURL(search_provider.hostname);
if (!search_provider_hostname.is_valid()) {
continue;
}
const SearchProviderInfo search_provider = iter->second;

if (!visited_url.DomainIs(search_provider_hostname.host_piece())) {
continue;
}
size_t index = search_provider.search_template.find('{');
std::string substring = search_provider.search_template.substr(0, index);
size_t href_index = url.find(substring);

size_t index = search_provider.search_template.find('{');
std::string substring = search_provider.search_template.substr(0, index);
size_t href_index = url.find(substring);

if (index != std::string::npos && href_index != std::string::npos) {
// Checking if search template in as defined in |search_providers.h|
// is defined, e.g. |https://searx.me/?q={searchTerms}&categories=general|
// matches |?q={|
std::string key;
if (!RE2::PartialMatch(
search_provider.search_template, "\\?(.*?)\\={", &key)) {
return search_query_keywords;
}

search_query_keywords = helper::Uri::GetValueForKeyInQuery(url, key);
break;
if (index != std::string::npos && href_index != std::string::npos) {
// Checking if search template in as defined in |search_providers.h|
// is defined, e.g. |https://searx.me/?q={searchTerms}&categories=general|
// matches |?q={|
std::string key;
if (!RE2::PartialMatch(
search_provider.search_template, "\\?(.*?)\\={", &key)) {
return search_query_keywords;
}

search_query_keywords = helper::Uri::GetValueForKeyInQuery(url, key);
}

return search_query_keywords;
Expand Down
Loading

0 comments on commit 2da9375

Please sign in to comment.