From 94f88aa5a337c5bd00fe619b18892cf04bc4e334 Mon Sep 17 00:00:00 2001 From: brave-builds Date: Thu, 14 Oct 2021 14:52:06 +0000 Subject: [PATCH] Uplift of #10200 (squashed) to beta --- .../empty_segment_classification.json | 38 +++++++++++++++++++ .../bandits/epsilon_greedy_bandit_arms.cc | 19 ++++++++-- ...psilon_greedy_bandit_processor_unittest.cc | 25 ++++++++++++ .../ads/internal/ml/pipeline/pipeline_util.cc | 11 ++++-- .../text_processing/text_processing.cc | 2 + .../text_processing_unittest.cc | 18 +++++++++ .../purchase_intent_resource.cc | 13 ++++++- 7 files changed, 117 insertions(+), 9 deletions(-) create mode 100644 vendor/bat-native-ads/data/test/ml/pipeline/text_processing/empty_segment_classification.json diff --git a/vendor/bat-native-ads/data/test/ml/pipeline/text_processing/empty_segment_classification.json b/vendor/bat-native-ads/data/test/ml/pipeline/text_processing/empty_segment_classification.json new file mode 100644 index 000000000000..5ea4a554aedb --- /dev/null +++ b/vendor/bat-native-ads/data/test/ml/pipeline/text_processing/empty_segment_classification.json @@ -0,0 +1,38 @@ +{ + "version": 1, + "timestamp": "2019-03-13 17:33:31.708151", + "locale": "en", + "transformations": [ + { + "transformation_type": "TO_LOWER" + }, + { + "transformation_type": "HASHED_NGRAMS", + "params": { + "ngrams_range": [ + 1 + ], + "num_buckets": 500 + } + } + ], + "classifier": { + "classifier_type": "LINEAR", + "classes": [ + "ham", + "" + ], + "class_weights": { + "ham": [ + -6.0868 + ], + "": [ + 2.45563 + ] + }, + "biases": [ + -0.9163, + -1.6094 + ] + } + } diff --git a/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/data_types/behavioral/bandits/epsilon_greedy_bandit_arms.cc b/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/data_types/behavioral/bandits/epsilon_greedy_bandit_arms.cc index 93d992e4e4d9..84dc6bd24349 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/data_types/behavioral/bandits/epsilon_greedy_bandit_arms.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/data_types/behavioral/bandits/epsilon_greedy_bandit_arms.cc @@ -12,6 +12,7 @@ #include "base/json/json_writer.h" #include "base/notreached.h" #include "base/values.h" +#include "bat/ads/internal/logging.h" #include "third_party/abseil-cpp/absl/types/optional.h" namespace ads { @@ -39,7 +40,7 @@ bool GetArmFromDictionary(const base::DictionaryValue* dictionary, EpsilonGreedyBanditArmInfo arm; const std::string* segment = dictionary->FindStringKey(kSegmentKey); - if (!segment) { + if (!segment || segment->empty()) { return false; } arm.segment = *segment; @@ -62,28 +63,38 @@ EpsilonGreedyBanditArmMap GetArmsFromDictionary( return arms; } + bool found_errors = false; for (const auto value : dictionary->DictItems()) { + if (value.first.empty()) { + found_errors = true; + continue; + } + if (!value.second.is_dict()) { - NOTREACHED(); + found_errors = true; continue; } const base::DictionaryValue* arm_dictionary = nullptr; value.second.GetAsDictionary(&arm_dictionary); if (!arm_dictionary) { - NOTREACHED(); + found_errors = true; continue; } EpsilonGreedyBanditArmInfo arm; if (!GetArmFromDictionary(arm_dictionary, &arm)) { - NOTREACHED(); + found_errors = true; continue; } arms[value.first] = arm; } + if (found_errors) { + BLOG(0, "Errors detected when parsing epsilon greedy bandit arms"); + } + return arms; } diff --git a/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/processors/behavioral/bandits/epsilon_greedy_bandit_processor_unittest.cc b/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/processors/behavioral/bandits/epsilon_greedy_bandit_processor_unittest.cc index 97c7a71fab8c..d6b2d6963c8d 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/processors/behavioral/bandits/epsilon_greedy_bandit_processor_unittest.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/ad_targeting/processors/behavioral/bandits/epsilon_greedy_bandit_processor_unittest.cc @@ -15,6 +15,17 @@ // npm run test -- brave_unit_tests --filter=BatAds* +namespace { + +constexpr char kArmsWithEmptySegmentJson[] = R"( + { + "travel":{"pulls":0,"segment":"travel","value":1.0}, + "":{"pulls":0,"segment":"","value":1.0} + } +)"; + +} // namespace + namespace ads { namespace ad_targeting { @@ -187,5 +198,19 @@ TEST_F(BatAdsEpsilonGreedyBanditProcessorTest, ProcessChildSegment) { EXPECT_EQ(expected_arm, arm); } +TEST_F(BatAdsEpsilonGreedyBanditProcessorTest, + InitializeArmsFromResourceWithEmptySegments) { + // Arrange + + // Act + const EpsilonGreedyBanditArmMap arms = + EpsilonGreedyBanditArms::FromJson(kArmsWithEmptySegmentJson); + + // Assert + // Empty segments are skipped. + EXPECT_EQ(1U, arms.size()); + EXPECT_EQ(1U, arms.count("travel")); +} + } // namespace ad_targeting } // namespace ads diff --git a/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/pipeline_util.cc b/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/pipeline_util.cc index 5520ca9ed2f6..27f2709edee1 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/pipeline_util.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/pipeline_util.cc @@ -120,11 +120,16 @@ absl::optional ParsePipelineClassifier( std::vector classes; for (const base::Value& class_name : specified_classes->GetList()) { - if (class_name.is_string()) { - classes.push_back(class_name.GetString()); - } else { + if (!class_name.is_string()) { return absl::nullopt; } + + const std::string class_string = class_name.GetString(); + if (class_string.empty()) { + return absl::nullopt; + } + + classes.push_back(class_string); } base::Value* class_weights = classifier_value->FindDictKey("class_weights"); diff --git a/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing.cc b/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing.cc index 47121b6f3a88..aa06319a3a58 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing.cc @@ -8,6 +8,7 @@ #include #include "base/check.h" +#include "bat/ads/internal/logging.h" #include "bat/ads/internal/ml/data/text_data.h" #include "bat/ads/internal/ml/data/vector_data.h" #include "bat/ads/internal/ml/ml_transformation_util.h" @@ -67,6 +68,7 @@ bool TextProcessing::FromJson(const std::string& json) { is_initialized_ = true; } else { is_initialized_ = false; + BLOG(0, "Failed to parse text classification pipeline JSON"); } return is_initialized_; diff --git a/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing_unittest.cc b/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing_unittest.cc index 2e27c921beb0..34a31c24a706 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing_unittest.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/ml/pipeline/text_processing/text_processing_unittest.cc @@ -30,6 +30,9 @@ namespace { const char kValidSegmentClassificationPipeline[] = "ml/pipeline/text_processing/valid_segment_classification_min.json"; +const char kEmptySegmentClassificationPipeline[] = + "ml/pipeline/text_processing/empty_segment_classification.json"; + const char kInvalidSpamClassificationPipeline[] = "ml/pipeline/text_processing/invalid_spam_classification.json"; @@ -157,6 +160,21 @@ TEST_F(BatAdsTextProcessingPipelineTest, InvalidModelTest) { EXPECT_FALSE(loaded_successfully); } +TEST_F(BatAdsTextProcessingPipelineTest, EmptySegmentModelTest) { + // Arrange + pipeline::TextProcessing text_processing_pipeline; + const absl::optional json_optional = + ReadFileFromTestPathToString(kEmptySegmentClassificationPipeline); + + // Act + ASSERT_TRUE(json_optional.has_value()); + const std::string json = json_optional.value(); + const bool loaded_successfully = text_processing_pipeline.FromJson(json); + + // Assert + EXPECT_FALSE(loaded_successfully); +} + TEST_F(BatAdsTextProcessingPipelineTest, EmptyModelTest) { // Arrange pipeline::TextProcessing text_processing_pipeline; diff --git a/vendor/bat-native-ads/src/bat/ads/internal/resources/behavioral/purchase_intent/purchase_intent_resource.cc b/vendor/bat-native-ads/src/bat/ads/internal/resources/behavioral/purchase_intent/purchase_intent_resource.cc index 5a22b115abba..4b24927b4acf 100644 --- a/vendor/bat-native-ads/src/bat/ads/internal/resources/behavioral/purchase_intent/purchase_intent_resource.cc +++ b/vendor/bat-native-ads/src/bat/ads/internal/resources/behavioral/purchase_intent/purchase_intent_resource.cc @@ -101,8 +101,13 @@ bool PurchaseIntent::FromJson(const std::string& json) { } std::vector segments; - for (auto& segment : list3->GetList()) { - segments.push_back(segment.GetString()); + for (const auto& segment_value : list3->GetList()) { + const std::string segment = segment_value.GetString(); + if (segment.empty()) { + BLOG(1, "Failed to load from JSON, empty segment found"); + return false; + } + segments.push_back(segment); } // Parsing field: "segment_keywords" @@ -129,6 +134,10 @@ bool PurchaseIntent::FromJson(const std::string& json) { ad_targeting::PurchaseIntentSegmentKeywordInfo info; info.keywords = it.key(); for (const auto& segment_ix : it.value().GetList()) { + if (static_cast(segment_ix.GetInt()) >= segments.size()) { + BLOG(1, "Failed to load from JSON, segment keywords are ill-formed"); + return false; + } info.segments.push_back(segments.at(segment_ix.GetInt())); }