AdHash Bidder Adapter: update for brand safety (prebid#10087)

* AdHash Bidder Adapter: minor changes We're operating on a com TLD now. Added publisher in URL for easier routing. * Implemented brand safety Implemented brand safety checks * Fix for GDPR consent Removing the extra information as request data becomes too big and is sometimes truncated * Ad fraud prevention formula changed Ad fraud prevention formula changed to support negative values as well as linear distribution of article length * AdHash brand safety additions Adding starts-with and ends-with rules that will help us with languages such as German where a single word can be written in multiple ways depending on the gender and grammatical case. * AdHash brand safety updates Added support for Cyrillic characters. Added support for bidderURL parameter. Fixed score multiplier from 500 to 1000. * AdHash Analytics adapter * Support for recent ads Support for recent ads which gives us the option to do frequency and recency capping. * Fix for timestamp * PUB-222 Added logic for measuring the fill rate (fallbacks) for Prebid impressions * Unit tests for the analytics adapter Added unit tests for the analytics adapter * Removed export causing errors Removed an unneeded export of a const that was causing errors with the analytics adapter * Added globalScript parameter * PUB-227 Support for non-latin and non-cyrillic symbols * GEN-964 - Brand safety now checks the page URL for bad words. No ad is shown if there is at least one match. - Repeating code is optimized and moved to helper function - Multi-language support for brand safety * GEN-1025 Sending the needed ad density data to the bidder * Removing the analytics adaptor * Fix for regexp match * Version change * MINOR Code review changes * GEN-1153 Adding support for preroll ads * MINOR Video unit test added * Removing globalScript flag * Brand safety change Adding support for compound words as well as combo-patterns. --------- Co-authored-by: NikolayMGeorgiev <nikolay@adhash.org> Co-authored-by: Ventsislav Saraminev <v.saraminev@abv.bg> Co-authored-by: Dimitar Kalenderov <mitko.kalenderov@gmail.com>
rubicon-project · Aug 25, 2023 · 51f4590 · 51f4590
1 parent 859667e
commit 51f4590
Show file tree

Hide file tree

Showing 2 changed files with 74 additions and 36 deletions.
diff --git a/modules/adhashBidAdapter.js b/modules/adhashBidAdapter.js
@@ -1,9 +1,9 @@
-import {registerBidder} from '../src/adapters/bidderFactory.js';
+import { registerBidder } from '../src/adapters/bidderFactory.js';
 import { getStorageManager } from '../src/storageManager.js';
 import { includes } from '../src/polyfill.js';
 import { BANNER, VIDEO } from '../src/mediaTypes.js';
 
-const VERSION = '3.2';
+const VERSION = '3.6';
 const BAD_WORD_STEP = 0.1;
 const BAD_WORD_MIN = 0.2;
 const ADHASH_BIDDER_CODE = 'adhash';
@@ -19,6 +19,8 @@ const ADHASH_BIDDER_CODE = 'adhash';
  * @returns boolean flag is the page safe
  */
 function brandSafety(badWords, maxScore) {
+  const delimiter = '~';
+
   /**
    * Performs the ROT13 encoding on the string argument and returns the resulting string.
    * The Adhash bidder uses ROT13 so that the response is not blocked by:
@@ -40,17 +42,17 @@ function brandSafety(badWords, maxScore) {
   /**
    * Calculates the scoring for each bad word with dimishing returns
    * @param {integer} points points that this word costs
-   * @param {integer} occurances number of occurances
+   * @param {integer} occurrences number of occurrences
    * @returns {float} final score
    */
-  const scoreCalculator = (points, occurances) => {
+  const scoreCalculator = (points, occurrences) => {
     let positive = true;
     if (points < 0) {
       points *= -1;
       positive = false;
     }
     let result = 0;
-    for (let i = 0; i < occurances; i++) {
+    for (let i = 0; i < occurrences; i++) {
       result += Math.max(points - i * BAD_WORD_STEP, BAD_WORD_MIN);
     }
     return positive ? result : -result;
@@ -60,22 +62,50 @@ function brandSafety(badWords, maxScore) {
    * Checks what rule will match in the given array with words
    * @param {string} rule rule type (full, partial, starts, ends, regexp)
    * @param {string} decodedWord decoded word
-   * @param {array} wordsToMatch array to find a match
+   * @param {string} wordsToMatch list of all words on the page separated by delimiters
    * @returns {object|boolean} matched rule and occurances. If nothing is matched returns false
    */
   const wordsMatchedWithRule = function (rule, decodedWord, wordsToMatch) {
-    if (rule === 'full' && wordsToMatch && wordsToMatch.includes(decodedWord)) {
-      return { rule, occurances: wordsToMatch.filter(element => element === decodedWord).length };
-    } else if (rule === 'partial' && wordsToMatch && wordsToMatch.some(element => element.indexOf(decodedWord) > -1)) {
-      return { rule, occurances: wordsToMatch.filter(element => element.indexOf(decodedWord) > -1).length };
-    } else if (rule === 'starts' && wordsToMatch && wordsToMatch.some(word => word.startsWith(decodedWord))) {
-      return { rule, occurances: wordsToMatch.filter(element => element.startsWith(decodedWord)).length };
-    } else if (rule === 'ends' && wordsToMatch && wordsToMatch.some(word => word.endsWith(decodedWord))) {
-      return { rule, occurances: wordsToMatch.filter(element => element.endsWith(decodedWord)).length };
-    } else if (rule === 'regexp' && wordsToMatch && wordsToMatch.some(element => element.match(new RegExp(decodedWord, 'i')))) {
-      return { rule, occurances: wordsToMatch.filter(element => element.match(new RegExp(decodedWord, 'i'))).length };
+    if (!wordsToMatch) {
+      return false;
+    }
+
+    let occurrences;
+    let adjustedWordToMatch;
+    decodedWord = decodedWord.split(' ').join(`${delimiter}${delimiter}`);
+    switch (rule) {
+      case 'full':
+        adjustedWordToMatch = `${delimiter}${decodedWord}${delimiter}`;
+        break;
+      case 'partial':
+        adjustedWordToMatch = decodedWord;
+        break;
+      case 'starts':
+        adjustedWordToMatch = `${delimiter}${decodedWord}`;
+        break;
+      case 'ends':
+        adjustedWordToMatch = `${decodedWord}${delimiter}`;
+        break;
+      case 'combo':
+        const allOccurrences = [];
+        const paddedWordsToMatch = `${delimiter}${wordsToMatch}${delimiter}`;
+        const decodedWordsSplit = decodedWord.split(`${delimiter}${delimiter}`);
+        for (const decodedWordPart of decodedWordsSplit) {
+          adjustedWordToMatch = `${delimiter}${decodedWordPart}${delimiter}`;
+          allOccurrences.push(paddedWordsToMatch.split(adjustedWordToMatch).length - 1);
+        }
+        occurrences = Math.min(...allOccurrences);
+        return occurrences > 0 ? { rule, occurrences } : false;
+      case 'regexp':
+        occurrences = [...wordsToMatch.matchAll(new RegExp(decodedWord, 'gi'))].length;
+        return occurrences > 0 ? { rule, occurrences } : false;
+      default:
+        return false;
     }
-    return false;
+
+    const paddedWordsToMatch = `${delimiter}${wordsToMatch}${delimiter}`;
+    occurrences = paddedWordsToMatch.split(adjustedWordToMatch).length - 1;
+    return occurrences > 0 ? { rule, occurrences } : false;
   };
 
   // Default parameters if the bidder is unable to send some of them
@@ -91,11 +121,11 @@ function brandSafety(badWords, maxScore) {
       .toLowerCase()
       .trim();
     const content = window.top.document.body.innerText.toLowerCase();
-    const contentWords = content.trim().split(/\s+/).length;
     // \p{L} matches a single unicode code point in the category 'letter'. Matches any kind of letter from any language.
     const regexp = new RegExp('[\\p{L}]+', 'gu');
-    const words = content.match(regexp);
-    const wordsInUrl = wordsAndNumbersInUrl.match(regexp);
+    const wordsMatched = content.match(regexp);
+    const words = wordsMatched.join(`${delimiter}${delimiter}`);
+    const wordsInUrl = wordsAndNumbersInUrl.match(regexp).join(`${delimiter}${delimiter}`);
 
     for (const [word, rule, points] of badWords) {
       const decodedWord = rot13(word.toLowerCase());
@@ -110,19 +140,11 @@ function brandSafety(badWords, maxScore) {
 
       // Check if site content's words match any of our brand safety rules
       const matchedRule = wordsMatchedWithRule(rule, decodedWord, words);
-      if (matchedRule.rule === 'full') {
-        score += scoreCalculator(points, matchedRule.occurances);
-      } else if (matchedRule.rule === 'partial') {
-        score += scoreCalculator(points, matchedRule.occurances);
-      } else if (matchedRule.rule === 'starts') {
-        score += scoreCalculator(points, matchedRule.occurances);
-      } else if (matchedRule.rule === 'ends') {
-        score += scoreCalculator(points, matchedRule.occurances);
-      } else if (matchedRule.rule === 'regexp') {
-        score += scoreCalculator(points, matchedRule.occurances);
+      if (matchedRule !== false) {
+        score += scoreCalculator(points, matchedRule.occurrences);
       }
     }
-    return score < (maxScore * contentWords) / 1000;
+    return score < (maxScore * wordsMatched.length) / 1000;
   } catch (e) {
     return true;
   }
@@ -183,8 +205,8 @@ export const spec = {
       }
 
       // Needed for the ad density calculation
-      var adHeight = validBidRequests[i].sizes[index][1];
-      var adWidth = validBidRequests[i].sizes[index][0];
+      const adHeight = validBidRequests[i].sizes[index][1];
+      const adWidth = validBidRequests[i].sizes[index][0];
       if (!window.adsCount) {
         window.adsCount = 0;
       }
@@ -247,7 +269,7 @@ export const spec = {
     const bidderResponse = JSON.stringify({ responseText: JSON.stringify(responseBody) });
     const requestData = JSON.stringify(request.data);
 
-    var response = {
+    let response = {
       requestId: request.bidRequest.bidId,
       cpm: responseBody.creatives[0].costEUR,
       width: request.bidRequest.sizes[0][0],

diff --git a/test/spec/modules/adhashBidAdapter_spec.js b/test/spec/modules/adhashBidAdapter_spec.js
@@ -88,7 +88,7 @@ describe('adhashBidAdapter', function () {
       );
       expect(result.length).to.equal(1);
       expect(result[0].method).to.equal('POST');
-      expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.2&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
+      expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.6&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
       expect(result[0].bidRequest).to.equal(bidRequest);
       expect(result[0].data).to.have.property('timezone');
       expect(result[0].data).to.have.property('location');
@@ -104,7 +104,7 @@ describe('adhashBidAdapter', function () {
       const result = spec.buildRequests([ bidRequest ], { gdprConsent: { gdprApplies: true, consentString: 'example' } });
       expect(result.length).to.equal(1);
       expect(result[0].method).to.equal('POST');
-      expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.2&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
+      expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.6&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
       expect(result[0].bidRequest).to.equal(bidRequest);
       expect(result[0].data).to.have.property('timezone');
       expect(result[0].data).to.have.property('location');
@@ -152,6 +152,8 @@ describe('adhashBidAdapter', function () {
           ['дума', 'full', 1],
           ['старт', 'starts', 1],
           ['край', 'ends', 1],
+          ['onq jbeq', 'partial', 1],
+          ['dhrra qvrf', 'combo', 2],
         ],
         maxScore: 2
       }
@@ -196,6 +198,13 @@ describe('adhashBidAdapter', function () {
       expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
     });
 
+    it('should return empty array when there are bad words (partial, compound phrase)', function () {
+      bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
+        return 'example text partialbad wordb bad wordb example bad wordbtext' + ' word'.repeat(994);
+      });
+      expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
+    });
+
     it('should return empty array when there are bad words (starts)', function () {
       bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
         return 'example text startsWith starts text startsAgain' + ' word'.repeat(994);
@@ -224,6 +233,13 @@ describe('adhashBidAdapter', function () {
       expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
     });
 
+    it('should return empty array when there are bad words (combo)', function () {
+      bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
+        return 'queen of england dies, the queen dies' + ' word'.repeat(993);
+      });
+      expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
+    });
+
     it('should return empty array when there are bad words (regexp)', function () {
       bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
         return 'example text xxxayyy zzxxxAyyyzz text xxxbyyy' + ' word'.repeat(994);