Skip to content
This repository has been archived by the owner on Dec 11, 2019. It is now read-only.

Adds media publishers support #11727

Merged
merged 3 commits into from
Nov 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 193 additions & 37 deletions app/browser/api/ledger.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const acorn = require('acorn')
const moment = require('moment')
const Immutable = require('immutable')
const electron = require('electron')
const ipc = electron.ipcMain
const path = require('path')
const os = require('os')
const qr = require('qr-image')
Expand All @@ -29,6 +30,7 @@ const migrationState = require('../../common/state/migrationState')

// Constants
const settings = require('../../../js/constants/settings')
const messages = require('../../../js/constants/messages')

// Utils
const tabs = require('../../browser/tabs')
Expand All @@ -42,6 +44,7 @@ const ledgerUtil = require('../../common/lib/ledgerUtil')
const tabState = require('../../common/state/tabState')
const pageDataUtil = require('../../common/lib/pageDataUtil')
const ledgerNotifications = require('./ledgerNotifications')
const ledgerVideoCache = require('../../common/cache/ledgerVideoCache')

// Caching
let locationDefault = 'NOOP'
Expand Down Expand Up @@ -107,6 +110,38 @@ underscore.keys(fileTypes).forEach((fileType) => {
})
signatureMax = Math.ceil(signatureMax * 1.5)

if (ipc) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Accidentally removed in #11882

ipc.on(messages.LEDGER_PUBLISHER, (event, location) => {
if (!synopsis || event.sender.session === electron.session.fromPartition('default') || !tldjs.isValid(location)) {
event.returnValue = {}
return
}

let ctx = urlParse(location, true)
ctx.TLD = tldjs.getPublicSuffix(ctx.host)
if (!ctx.TLD) {
if (_internal.verboseP) console.log('\nno TLD for:' + ctx.host)
event.returnValue = {}
return
}

ctx = underscore.mapObject(ctx, function (value) {
if (!underscore.isFunction(value)) return value
})
ctx.URL = location
ctx.SLD = tldjs.getDomain(ctx.host)
ctx.RLD = tldjs.getSubdomain(ctx.host)
ctx.QLD = ctx.RLD ? underscore.last(ctx.RLD.split('.')) : ''

if (!event.sender.isDestroyed()) {
event.sender.send(messages.LEDGER_PUBLISHER_RESPONSE + '-' + location, {
context: ctx,
rules: _internal.ruleset.cooked
})
}
})
}

let ledgerPaymentsPresent = {}
const paymentPresent = (state, tabId, present) => {
if (present) {
Expand Down Expand Up @@ -247,7 +282,8 @@ const getPublisherData = (result, scorekeeper) => {
let data = {
verified: result.options.verified || false,
exclude: result.options.exclude || false,
site: result.publisherKey,
publisherKey: result.publisherKey,
siteName: result.publisherKey,
views: result.visits,
duration: duration,
daysSpent: 0,
Expand All @@ -259,9 +295,16 @@ const getPublisherData = (result, scorekeeper) => {
pinPercentage: result.pinPercentage,
weight: result.pinPercentage
}
// HACK: Protocol is sometimes blank here, so default to http:// so we can
// still generate publisherURL.
data.publisherURL = (result.protocol || 'http:') + '//' + result.publisherKey

data.publisherURL = result.publisherURL || ((result.protocol || 'https:') + '//' + result.publisherKey)

// media publisher
if (result.faviconName) {
data.siteName = locale.translation('publisherMediaName', {
publisherName: result.faviconName,
provider: result.providerName
})
}

if (duration >= miliseconds.day) {
data.daysSpent = Math.max(Math.round(duration / miliseconds.day), 1)
Expand Down Expand Up @@ -543,7 +586,7 @@ const excludeP = (publisherKey, callback) => {
return done(err, result)
}

let props = ledgerPublisher.getPublisherProps('https://' + publisherKey)
let props = ledgerPublisher.getPublisherProps(publisherKey)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is publisherKey ultimately generated? just making sure it has some kind of input validation

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we get it from bat-publisher library https://github.com/brave/browser-laptop/pull/11727/files#diff-3cdfdb124f718787d584e9e2b82fcedcR2505, with ledgerPublisher.getMedia.getPublisherFromMediaProps

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

@NejcZdovc NejcZdovc Nov 10, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In https://github.com/brave-intl/bat-publisher/blob/d896a93465df57b3cca227b90530f0b40044d5fc/getMedia.js#L77 we call publisher oembed, which is defined here https://github.com/brave-intl/bat-publisher/blob/d896a93465df57b3cca227b90530f0b40044d5fc/media/providers.json#L7 (for youtube is https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v=kLiLOkzLetE) and we get back something like this

{"html":"\u003ciframe width=\"480\" height=\"270\" src=\"https:\/\/www.youtube.com\/embed\/kLiLOkzLetE?feature=oembed\" frameborder=\"0\" gesture=\"media\" allowfullscreen\u003e\u003c\/iframe\u003e","title":"This Is Brave","thumbnail_height":360,"version":"1.0","provider_name":"YouTube","thumbnail_width":480,"width":480,"author_name":"Brave","author_url":"https:\/\/www.youtube.com\/channel\/UCFNTTISby1c_H-rm5Ww5rZg","height":270,"thumbnail_url":"https:\/\/i.ytimg.com\/vi\/kLiLOkzLetE\/hqdefault.jpg","provider_url":"https:\/\/www.youtube.com\/","type":"video"}

which contains author_url

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think https://github.com/brave-intl/bat-publisher/blob/d896a93465df57b3cca227b90530f0b40044d5fc/getMedia.js#L91 needs to validate that author_url is a valid URL. this is easy because if not then parts will be null

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these last two are fixed in bat-publisher@1.2.1 and bat-client@1.4.2

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

versions updated in this PR

if (!props) return done()

v2RulesetDB.createReadStream({lt: 'domain:'}).on('data', (data) => {
Expand Down Expand Up @@ -584,30 +627,26 @@ const excludeP = (publisherKey, callback) => {
})
}

const addVisit = (state, startTimestamp, location, tabId) => {
const addSiteVisit = (state, timestamp, location, tabId) => {
if (!synopsis) {
return state
}

location = pageDataUtil.getInfoKey(location)
const locationData = ledgerState.getLocation(state, location)
const timestamp = new Date().getTime()
const duration = new Date().getTime() - timestamp
if (_internal.verboseP) {
console.log(
`locations[${location}]=${JSON.stringify(locationData, null, 2)} ` +
`duration=${(timestamp - startTimestamp)} msec tabId= ${tabId}`
`duration=${(duration)} msec tabId= ${tabId}`
)
}
if (locationData.isEmpty() || !tabId) {
return state
}

let publisherKey = locationData.get('publisher')
if (!publisherKey) {
if (locationData.isEmpty()) {
return state
}

let duration = timestamp - startTimestamp
let publisherKey = locationData.get('publisher')
let revisitP = false

if (duration >= getSetting(settings.PAYMENTS_MINIMUM_VISIT_TIME)) {
Expand All @@ -627,13 +666,19 @@ const addVisit = (state, startTimestamp, location, tabId) => {
}
}

return saveVisit(state, publisherKey, duration, revisitP)
}

const saveVisit = (state, publisherKey, duration, revisited) => {
if (!synopsis || !publisherKey) {
return state
}

if (_internal.verboseP) {
console.log('\nadd publisher ' + publisherKey + ': ' + (duration / 1000) + ' sec' + ' revisitP=' + revisitP + ' state=' +
JSON.stringify(underscore.extend({location: location}, visitsByPublisher[publisherKey][location]),
null, 2))
console.log('\nadd publisher ' + publisherKey + ': ' + (duration / 1000) + ' sec' + ' revisitP=' + revisited)
}

synopsis.addPublisher(publisherKey, {duration: duration, revisitP: revisitP})
synopsis.addPublisher(publisherKey, {duration: duration, revisitP: revisited})
state = ledgerState.setPublisher(state, publisherKey, synopsis.publishers[publisherKey])
state = updatePublisherInfo(state)
state = checkVerifiedStatus(state, publisherKey)
Expand Down Expand Up @@ -684,7 +729,7 @@ const addNewLocation = (state, location, tabId = tabState.TAB_ID_NONE, keepInfo

// Add visit to the ledger when we are not in a private tab
if (!isPrivate && !tabFromState.isEmpty() && ledgerUtil.shouldTrackView(tabFromState)) {
state = addVisit(state, currentTimestamp, currentUrl, currentTabId)
state = addSiteVisit(state, currentTimestamp, currentUrl, currentTabId)
}
}

Expand Down Expand Up @@ -1130,21 +1175,12 @@ const cacheRuleSet = (state, ruleset) => {
for (let item of publishers) {
const publisherKey = item[0]
const publisher = item[1]
const location = (publisher.get('protocol') || 'http:') + '//' + publisherKey
let ctx = urlParse(location)
const ctx = ledgerPublisher.getPublisherProps(publisherKey)

ctx.TLD = tldjs.getPublicSuffix(ctx.host)
if (!ctx.TLD) {
return state
}
if (!ctx.TLD) continue

ctx = underscore.mapObject(ctx, function (value) {
if (!underscore.isFunction(value)) return value
})
ctx.URL = location
ctx.SLD = tldjs.getDomain(ctx.host)
ctx.RLD = tldjs.getSubdomain(ctx.host)
ctx.QLD = ctx.RLD ? underscore.last(ctx.RLD.split('.')) : ''
if (publisher.publisherURL) ctx.URL = publisher.publisherURL
if (!ctx.URL) ctx.URL = (publisher.get('protocol') || 'https:') + '//' + publisherKey

stewed.forEach((rule) => {
if (rule.consequent !== null || rule.dom) return
Expand Down Expand Up @@ -1178,7 +1214,8 @@ const roundtrip = (params, options, callback) => {
let parts = typeof params.server === 'string' ? urlParse(params.server)
: typeof params.server !== 'undefined' ? params.server
: typeof options.server === 'string' ? urlParse(options.server) : options.server
const rawP = options.rawP
const binaryP = options.binaryP
const rawP = binaryP || options.rawP

if (!params.method) params.method = 'GET'
parts = underscore.extend(underscore.pick(parts, ['protocol', 'hostname', 'port']),
Expand All @@ -1205,7 +1242,7 @@ const roundtrip = (params, options, callback) => {
url: urlFormat(parts),
method: params.method,
payload: params.payload,
responseType: 'text',
responseType: binaryP ? 'binary' : 'text',
headers: underscore.defaults(params.headers || {}, {'content-type': 'application/json; charset=utf-8'}),
verboseP: options.verboseP
}
Expand All @@ -1220,7 +1257,7 @@ const roundtrip = (params, options, callback) => {
console.log('>>> ' + header + ': ' + response.headers[header])
})
console.log('>>>')
console.log('>>> ' + (body || '').split('\n').join('\n>>> '))
console.log('>>> ' + (rawP ? '...' : (body || '').split('\n').join('\n>>> ')))
}

if (err) return callback(err, response)
Expand Down Expand Up @@ -2155,6 +2192,117 @@ const transitionWalletToBat = () => {
}
}

let currentMediaKey = null
const onMediaRequest = (state, xhr, type, tabId) => {
if (!xhr || type == null) {
return state
}

const parsed = ledgerUtil.getMediaData(xhr, type)
const mediaId = ledgerUtil.getMediaId(parsed, type)
const mediaKey = ledgerUtil.getMediaKey(mediaId, type)
let duration = ledgerUtil.getMediaDuration(parsed, type)

if (mediaId == null || duration == null || mediaKey == null) {
return state
}

const minDuration = getSetting(settings.PAYMENTS_MINIMUM_VISIT_TIME)
duration = parseInt(duration)
if (duration > 0 && duration < minDuration) {
duration = minDuration
}

if (!ledgerPublisher) {
ledgerPublisher = require('bat-publisher')
}

let revisited = true
const activeTabId = tabState.getActiveTabId(state)
if (activeTabId === tabId && mediaKey !== currentMediaKey) {
revisited = false
currentMediaKey = mediaKey
}

const cache = ledgerVideoCache.getDataByVideoId(state, mediaKey)

if (!cache.isEmpty()) {
return module.exports.saveVisit(state, cache.get('publisher'), duration, revisited)
}

const options = underscore.extend({roundtrip: roundtrip}, clientOptions)
const mediaProps = {
mediaId,
providerName: type
}

ledgerPublisher.getMedia.getPublisherFromMediaProps(mediaProps, options, (error, response) => {
Copy link
Member

@diracdeltas diracdeltas Nov 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. bat-publisher should throw an error if the media URL protocol is not "https:"
  2. it appears bat-publisher is making a call to a youtube.com page from the browser main process, scraping and parsing the page HTML, and extracting page data from it. from a security perspective, it would be much better to do this in a page content script that runs when the user visits a youtube page (ex: how we currently extract favicons from publisher pages). https://github.com/brave-intl/bat-publisher/blob/d896a93465df57b3cca227b90530f0b40044d5fc/getMedia.js#L103

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. bat-publisher is being passed a URL that the browser is playing. whether it is http? is immaterial. the goal is to look at the URL string and figure out where to look for the oembed data.

  2. i think that this is a worthy future, but not essential at this point. it is certainly good to have routines that parse data run in a separate context. it's not clear to me why that is "more necessary" in this case. we can open up an issue for this, and when we do, it would be worthwhile to see what other cases might exist!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. is an easy change and should be done for paranoia (AKA security in depth) in case there's ever code that looks at the URL and fetches it.

I've opened a follow up issue for 2: #11889

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mrose17 I think comment 1 is still unaddressed? I'd also strongly prefer we address 2 before moving on. The follow up issue seems to be to address the general case, but no reason for us to not adhere to the correct practices now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mrose17 I'm going to hold off on merge until @evq 's concern can be addressed (and we have a sign off in https://github.com/brave/internal/issues/141). Let us know if we need to update libs after that is addressed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. is addressed. we aren't opening a URL, we are parsing it in order to find an oembed provider. if the browser decides to play it and then ask for information, then the module will provide the information. however, in the interest of moving things along, i have pushed bat-publisher@1.2.3 that makes this change.

  2. it is not a simple thing to make this change (it has to be made in the browser). nor am i convinced that it makes sense to require this code be held to a standard higher than any other code in the browser. as i noted in my comment.

everything has a cost. i suppose we can address 1, because it's a small change, even if i do view it as unnecessary. in addressing 2, it has a large cost (i already researched it with @NejcZdovc) and addresses a theoretical concern. there is significant cost to that we will not be able to merge this upcoming week. i don't have an issue with the browser folks reviewing the issue that @diracdeltas created after this merge, and having the same standard applied to the entire browser. i most certainly have an issue having this new standard being applied in this context and in this timeline.

if (error) {
console.error('Error while getting publisher from media', error.toString())
return
}

// publisher not found
if (!response) {
return
}

appActions.onLedgerMediaPublisher(mediaKey, response, duration, revisited)
})

return state
}

const onMediaPublisher = (state, mediaKey, response, duration, revisited) => {
const publisherKey = response ? response.get('publisher') : null
if (publisherKey == null) {
return state
}

let publisher = ledgerState.getPublisher(state, publisherKey)
const faviconName = response.get('faviconName')
const faviconURL = response.get('faviconURL')
const publisherURL = response.get('publisherURL')

if (publisher.isEmpty()) {
revisited = false
synopsis.initPublisher(publisherKey)

synopsis.publishers[publisherKey].faviconName = faviconName
synopsis.publishers[publisherKey].faviconURL = faviconURL
synopsis.publishers[publisherKey].publisherURL = publisherURL
synopsis.publishers[publisherKey].providerName = response.get('providerName')

if (synopsis.publishers[publisherKey]) {
state = ledgerState.setPublisher(state, publisherKey, synopsis.publishers[publisherKey])
}

if (!getSetting(settings.PAYMENTS_SITES_AUTO_SUGGEST)) {
appActions.onPublisherOptionUpdate(publisherKey, 'exclude', true)
savePublisherOption(publisherKey, 'exclude', true)
} else {
excludeP(publisherKey, (unused, exclude) => {
appActions.onPublisherOptionUpdate(publisherKey, 'exclude', exclude)
savePublisherOption(publisherKey, 'exclude', exclude)
})
}
} else {
synopsis.publishers[publisherKey].faviconName = faviconName
synopsis.publishers[publisherKey].faviconURL = faviconURL
synopsis.publishers[publisherKey].publisherURL = publisherURL
state = ledgerState.setPublishersProp(state, publisherKey, 'faviconName', faviconName)
state = ledgerState.setPublishersProp(state, publisherKey, 'faviconURL', faviconURL)
state = ledgerState.setPublishersProp(state, publisherKey, 'publisherURL', publisherURL)
}

// Add to cache
state = ledgerVideoCache.setCacheByVideoId(state, mediaKey, response)

state = module.exports.saveVisit(state, publisherKey, duration, revisited)

return state
}

const getMethods = () => {
const publicMethods = {
backupKeys,
Expand Down Expand Up @@ -2189,15 +2337,19 @@ const getMethods = () => {
getNewClient,
savePublisherData,
pruneSynopsis,
checkBtcBatMigrated
checkBtcBatMigrated,
onMediaRequest,
onMediaPublisher,
saveVisit
}

let privateMethods = {}

if (process.env.NODE_ENV === 'test') {
privateMethods = {
enable,
addVisit,
addSiteVisit,
checkBtcBatMigrated,
clearVisitsByPublisher: function () {
visitsByPublisher = {}
},
Expand All @@ -2217,6 +2369,10 @@ const getMethods = () => {
setClient: (data) => {
client = data
},
setCurrentMediaKey: (key) => {
currentMediaKey = key
},
getCurrentMediaKey: (key) => currentMediaKey,
synopsisNormalizer,
checkVerifiedStatus
}
Expand Down
16 changes: 16 additions & 0 deletions app/browser/reducers/ledgerReducer.js
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,22 @@ const ledgerReducer = (state, action, immutableAction) => {
state = ledgerState.setLedgerValue(state, 'publisherTimestamp', action.get('timestamp'))
break
}
case appConstants.APP_ON_LEDGER_MEDIA_DATA:
{
state = ledgerApi.onMediaRequest(state, action.get('url'), action.get('type'), action.get('tabId'))
break
}
case appConstants.APP_ON_LEDGER_MEDIA_PUBLISHER:
{
state = ledgerApi.onMediaPublisher(
state,
action.get('mediaKey'),
action.get('response'),
action.get('duration'),
action.get('revisited')
)
break
}
}
return state
}
Expand Down
Loading