Skip to content

Commit

Permalink
AdHash Bidder Adapter: update for brand safety (prebid#10087)
Browse files Browse the repository at this point in the history
* AdHash Bidder Adapter: minor changes

We're operating on a com TLD now.
Added publisher in URL for easier routing.

* Implemented brand safety

Implemented brand safety checks

* Fix for GDPR consent

Removing the extra information as request data becomes too big and is sometimes truncated

* Ad fraud prevention formula changed

Ad fraud prevention formula changed to support negative values as well as linear distribution of article length

* AdHash brand safety additions

Adding starts-with and ends-with rules that will help us with languages such as German where a single word can be written in multiple ways depending on the gender and grammatical case.

* AdHash brand safety updates

Added support for Cyrillic characters.
Added support for bidderURL parameter.
Fixed score multiplier from 500 to 1000.

* AdHash Analytics adapter

* Support for recent ads

Support for recent ads which gives us the option to do frequency and recency capping.

* Fix for timestamp

* PUB-222

Added logic for measuring the fill rate (fallbacks) for Prebid impressions

* Unit tests for the analytics adapter

Added unit tests for the analytics adapter

* Removed export causing errors

Removed an unneeded export of a const that was causing errors with the analytics adapter

* Added globalScript parameter

* PUB-227

Support for non-latin and non-cyrillic symbols

* GEN-964

- Brand safety now checks the page URL for bad words. No ad is shown if there is at least one match.
- Repeating code is optimized and moved to helper function
- Multi-language support for brand safety

* GEN-1025

Sending the needed ad density data to the bidder

* Removing the analytics adaptor

* Fix for regexp match

* Version change

* MINOR

Code review changes

* GEN-1153

Adding support for preroll ads

* MINOR

Video unit test added

* Removing globalScript flag

* Brand safety change

Adding support for compound words as well as combo-patterns.

---------

Co-authored-by: NikolayMGeorgiev <nikolay@adhash.org>
Co-authored-by: Ventsislav Saraminev <v.saraminev@abv.bg>
Co-authored-by: Dimitar Kalenderov <mitko.kalenderov@gmail.com>
  • Loading branch information
4 people authored and Michele Nasti committed Aug 25, 2023
1 parent 859667e commit 51f4590
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 36 deletions.
90 changes: 56 additions & 34 deletions modules/adhashBidAdapter.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import {registerBidder} from '../src/adapters/bidderFactory.js';
import { registerBidder } from '../src/adapters/bidderFactory.js';
import { getStorageManager } from '../src/storageManager.js';
import { includes } from '../src/polyfill.js';
import { BANNER, VIDEO } from '../src/mediaTypes.js';

const VERSION = '3.2';
const VERSION = '3.6';
const BAD_WORD_STEP = 0.1;
const BAD_WORD_MIN = 0.2;
const ADHASH_BIDDER_CODE = 'adhash';
Expand All @@ -19,6 +19,8 @@ const ADHASH_BIDDER_CODE = 'adhash';
* @returns boolean flag is the page safe
*/
function brandSafety(badWords, maxScore) {
const delimiter = '~';

/**
* Performs the ROT13 encoding on the string argument and returns the resulting string.
* The Adhash bidder uses ROT13 so that the response is not blocked by:
Expand All @@ -40,17 +42,17 @@ function brandSafety(badWords, maxScore) {
/**
* Calculates the scoring for each bad word with dimishing returns
* @param {integer} points points that this word costs
* @param {integer} occurances number of occurances
* @param {integer} occurrences number of occurrences
* @returns {float} final score
*/
const scoreCalculator = (points, occurances) => {
const scoreCalculator = (points, occurrences) => {
let positive = true;
if (points < 0) {
points *= -1;
positive = false;
}
let result = 0;
for (let i = 0; i < occurances; i++) {
for (let i = 0; i < occurrences; i++) {
result += Math.max(points - i * BAD_WORD_STEP, BAD_WORD_MIN);
}
return positive ? result : -result;
Expand All @@ -60,22 +62,50 @@ function brandSafety(badWords, maxScore) {
* Checks what rule will match in the given array with words
* @param {string} rule rule type (full, partial, starts, ends, regexp)
* @param {string} decodedWord decoded word
* @param {array} wordsToMatch array to find a match
* @param {string} wordsToMatch list of all words on the page separated by delimiters
* @returns {object|boolean} matched rule and occurances. If nothing is matched returns false
*/
const wordsMatchedWithRule = function (rule, decodedWord, wordsToMatch) {
if (rule === 'full' && wordsToMatch && wordsToMatch.includes(decodedWord)) {
return { rule, occurances: wordsToMatch.filter(element => element === decodedWord).length };
} else if (rule === 'partial' && wordsToMatch && wordsToMatch.some(element => element.indexOf(decodedWord) > -1)) {
return { rule, occurances: wordsToMatch.filter(element => element.indexOf(decodedWord) > -1).length };
} else if (rule === 'starts' && wordsToMatch && wordsToMatch.some(word => word.startsWith(decodedWord))) {
return { rule, occurances: wordsToMatch.filter(element => element.startsWith(decodedWord)).length };
} else if (rule === 'ends' && wordsToMatch && wordsToMatch.some(word => word.endsWith(decodedWord))) {
return { rule, occurances: wordsToMatch.filter(element => element.endsWith(decodedWord)).length };
} else if (rule === 'regexp' && wordsToMatch && wordsToMatch.some(element => element.match(new RegExp(decodedWord, 'i')))) {
return { rule, occurances: wordsToMatch.filter(element => element.match(new RegExp(decodedWord, 'i'))).length };
if (!wordsToMatch) {
return false;
}

let occurrences;
let adjustedWordToMatch;
decodedWord = decodedWord.split(' ').join(`${delimiter}${delimiter}`);
switch (rule) {
case 'full':
adjustedWordToMatch = `${delimiter}${decodedWord}${delimiter}`;
break;
case 'partial':
adjustedWordToMatch = decodedWord;
break;
case 'starts':
adjustedWordToMatch = `${delimiter}${decodedWord}`;
break;
case 'ends':
adjustedWordToMatch = `${decodedWord}${delimiter}`;
break;
case 'combo':
const allOccurrences = [];
const paddedWordsToMatch = `${delimiter}${wordsToMatch}${delimiter}`;
const decodedWordsSplit = decodedWord.split(`${delimiter}${delimiter}`);
for (const decodedWordPart of decodedWordsSplit) {
adjustedWordToMatch = `${delimiter}${decodedWordPart}${delimiter}`;
allOccurrences.push(paddedWordsToMatch.split(adjustedWordToMatch).length - 1);
}
occurrences = Math.min(...allOccurrences);
return occurrences > 0 ? { rule, occurrences } : false;
case 'regexp':
occurrences = [...wordsToMatch.matchAll(new RegExp(decodedWord, 'gi'))].length;
return occurrences > 0 ? { rule, occurrences } : false;
default:
return false;
}
return false;

const paddedWordsToMatch = `${delimiter}${wordsToMatch}${delimiter}`;
occurrences = paddedWordsToMatch.split(adjustedWordToMatch).length - 1;
return occurrences > 0 ? { rule, occurrences } : false;
};

// Default parameters if the bidder is unable to send some of them
Expand All @@ -91,11 +121,11 @@ function brandSafety(badWords, maxScore) {
.toLowerCase()
.trim();
const content = window.top.document.body.innerText.toLowerCase();
const contentWords = content.trim().split(/\s+/).length;
// \p{L} matches a single unicode code point in the category 'letter'. Matches any kind of letter from any language.
const regexp = new RegExp('[\\p{L}]+', 'gu');
const words = content.match(regexp);
const wordsInUrl = wordsAndNumbersInUrl.match(regexp);
const wordsMatched = content.match(regexp);
const words = wordsMatched.join(`${delimiter}${delimiter}`);
const wordsInUrl = wordsAndNumbersInUrl.match(regexp).join(`${delimiter}${delimiter}`);

for (const [word, rule, points] of badWords) {
const decodedWord = rot13(word.toLowerCase());
Expand All @@ -110,19 +140,11 @@ function brandSafety(badWords, maxScore) {

// Check if site content's words match any of our brand safety rules
const matchedRule = wordsMatchedWithRule(rule, decodedWord, words);
if (matchedRule.rule === 'full') {
score += scoreCalculator(points, matchedRule.occurances);
} else if (matchedRule.rule === 'partial') {
score += scoreCalculator(points, matchedRule.occurances);
} else if (matchedRule.rule === 'starts') {
score += scoreCalculator(points, matchedRule.occurances);
} else if (matchedRule.rule === 'ends') {
score += scoreCalculator(points, matchedRule.occurances);
} else if (matchedRule.rule === 'regexp') {
score += scoreCalculator(points, matchedRule.occurances);
if (matchedRule !== false) {
score += scoreCalculator(points, matchedRule.occurrences);
}
}
return score < (maxScore * contentWords) / 1000;
return score < (maxScore * wordsMatched.length) / 1000;
} catch (e) {
return true;
}
Expand Down Expand Up @@ -183,8 +205,8 @@ export const spec = {
}

// Needed for the ad density calculation
var adHeight = validBidRequests[i].sizes[index][1];
var adWidth = validBidRequests[i].sizes[index][0];
const adHeight = validBidRequests[i].sizes[index][1];
const adWidth = validBidRequests[i].sizes[index][0];
if (!window.adsCount) {
window.adsCount = 0;
}
Expand Down Expand Up @@ -247,7 +269,7 @@ export const spec = {
const bidderResponse = JSON.stringify({ responseText: JSON.stringify(responseBody) });
const requestData = JSON.stringify(request.data);

var response = {
let response = {
requestId: request.bidRequest.bidId,
cpm: responseBody.creatives[0].costEUR,
width: request.bidRequest.sizes[0][0],
Expand Down
20 changes: 18 additions & 2 deletions test/spec/modules/adhashBidAdapter_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ describe('adhashBidAdapter', function () {
);
expect(result.length).to.equal(1);
expect(result[0].method).to.equal('POST');
expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.2&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.6&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
expect(result[0].bidRequest).to.equal(bidRequest);
expect(result[0].data).to.have.property('timezone');
expect(result[0].data).to.have.property('location');
Expand All @@ -104,7 +104,7 @@ describe('adhashBidAdapter', function () {
const result = spec.buildRequests([ bidRequest ], { gdprConsent: { gdprApplies: true, consentString: 'example' } });
expect(result.length).to.equal(1);
expect(result[0].method).to.equal('POST');
expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.2&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
expect(result[0].url).to.equal('https://bidder.adhash.com/rtb?version=3.6&prebid=true&publisher=0xc3b09b27e9c6ef73957901aa729b9e69e5bbfbfb');
expect(result[0].bidRequest).to.equal(bidRequest);
expect(result[0].data).to.have.property('timezone');
expect(result[0].data).to.have.property('location');
Expand Down Expand Up @@ -152,6 +152,8 @@ describe('adhashBidAdapter', function () {
['дума', 'full', 1],
['старт', 'starts', 1],
['край', 'ends', 1],
['onq jbeq', 'partial', 1],
['dhrra qvrf', 'combo', 2],
],
maxScore: 2
}
Expand Down Expand Up @@ -196,6 +198,13 @@ describe('adhashBidAdapter', function () {
expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
});

it('should return empty array when there are bad words (partial, compound phrase)', function () {
bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
return 'example text partialbad wordb bad wordb example bad wordbtext' + ' word'.repeat(994);
});
expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
});

it('should return empty array when there are bad words (starts)', function () {
bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
return 'example text startsWith starts text startsAgain' + ' word'.repeat(994);
Expand Down Expand Up @@ -224,6 +233,13 @@ describe('adhashBidAdapter', function () {
expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
});

it('should return empty array when there are bad words (combo)', function () {
bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
return 'queen of england dies, the queen dies' + ' word'.repeat(993);
});
expect(spec.interpretResponse(serverResponse, request).length).to.equal(0);
});

it('should return empty array when there are bad words (regexp)', function () {
bodyStub = sinon.stub(window.top.document.body, 'innerText').get(function() {
return 'example text xxxayyy zzxxxAyyyzz text xxxbyyy' + ' word'.repeat(994);
Expand Down

0 comments on commit 51f4590

Please sign in to comment.