Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(logo): avoid data uri with no length #691

Merged
merged 1 commit into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,12 @@
"commitlint": {
"extends": [
"@commitlint/config-conventional"
]
],
"rules": {
"body-max-length": [
0
]
}
},
"nano-staged": {
"*.js": [
Expand Down
13 changes: 10 additions & 3 deletions packages/metascraper-helpers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const _normalizeUrl = require('normalize-url')
const smartquotes = require('smartquotes')
const { decodeHTML } = require('entities')
const iso6393 = require('iso-639-3/to-1')
const dataUri = require('data-uri-utils')
const hasValues = require('has-values')
const chrono = require('chrono-node')
const isIso = require('isostring')
Expand Down Expand Up @@ -375,11 +376,17 @@ const $jsonld = propName => $ => {

const image = (value, opts) => {
const urlValue = url(value, opts)
return urlValue !== undefined &&

const result =
urlValue !== undefined &&
!isAudioUrl(urlValue, opts) &&
!isVideoUrl(urlValue, opts)
? urlValue
: undefined
? urlValue
: undefined

if (!dataUri.test(result)) return result
const buffer = dataUri.toBuffer(dataUri.normalize(result))
return buffer.length ? result : undefined
}

const logo = image
Expand Down
1 change: 1 addition & 0 deletions packages/metascraper-helpers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"audio-extensions": "0.0.0",
"chrono-node": "~2.7.4",
"condense-whitespace": "~2.0.0",
"data-uri-utils": "~1.0.7",
"entities": "~4.5.0",
"file-extension": "~4.0.5",
"has-values": "~2.0.1",
Expand Down
1 change: 1 addition & 0 deletions packages/metascraper-helpers/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ test('.image', t => {
image({ '@id': 'https://www.milanocittastato.it/#/schema/logo/image/' }),
undefined
)
t.is(image('data:,'), undefined)
})

test('.isImageUrl', t => {
Expand Down
6 changes: 3 additions & 3 deletions packages/metascraper-logo-favicon/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ const {
parseUrl,
normalizeUrl,
toRule,
url: urlFn
logo: logoFn
} = require('@metascraper/helpers')

const SIZE_REGEX_BY_X = /\d+x\d+/

const toUrl = toRule(urlFn)
const toLogo = toRule(logoFn)

const toSize = (input, url) => {
if (isEmpty(input)) return
Expand Down Expand Up @@ -170,7 +170,7 @@ module.exports = ({
const rootFavicon = createRootFavicon({ getLogo, withRootFavicon })
return {
logo: [
toUrl($ => {
toLogo($ => {
const sizes = getSizes($, sizeSelectors)
const size = pickFn(sizes, pickBiggerSize)
return get(size, 'url')
Expand Down
8 changes: 8 additions & 0 deletions packages/metascraper-logo-favicon/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,11 @@ test('resolve logo using from google associated with the domain', async t => {
const metadata = await metascraper({ url })
t.true(metadata.logo.includes('gstatic'))
})

test('avoid data URI when data length is 0', async t => {
const url = 'https://www.adobe.com/'
const html = '<link rel="icon" href="data:,">'
const metascraper = createMetascraper()
const metadata = await metascraper({ url, html })
t.is(metadata.logo, 'https://www.adobe.com/favicon.ico')
})
30 changes: 15 additions & 15 deletions packages/metascraper-logo/src/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use strict'

const { $jsonld, url: urlFn, toRule } = require('@metascraper/helpers')
const { $jsonld, logo: logoFn, toRule } = require('@metascraper/helpers')
const { eq, get } = require('lodash')

const toLogoUrl = ($, propName) => {
Expand All @@ -13,26 +13,26 @@ const toLogoUrl = ($, propName) => {
module.exports = ({ filter } = {}) => {
const mapper = filter
? async value => {
const result = urlFn(value)
const result = logoFn(value)
return typeof result === 'string' ? await filter(result) : result
}
: urlFn
: logoFn

const toUrl = toRule(mapper)
const toLogo = toRule(mapper)

return {
logo: [
toUrl($ => $('meta[property="og:logo"]').attr('content')),
toUrl($ => $('meta[itemprop="logo"]').attr('content')),
toUrl($ => $('img[itemprop="logo"]').attr('src')),
toUrl($ => toLogoUrl($, 'brand.logo')),
toUrl($ => toLogoUrl($, 'organization.logo')),
toUrl($ => toLogoUrl($, 'place.logo')),
toUrl($ => toLogoUrl($, 'product.logo')),
toUrl($ => toLogoUrl($, 'service.logo')),
toUrl($ => toLogoUrl($, 'publisher.logo')),
toUrl($ => toLogoUrl($, 'logo.url')),
toUrl($ => toLogoUrl($, 'logo'))
toLogo($ => $('meta[property="og:logo"]').attr('content')),
toLogo($ => $('meta[itemprop="logo"]').attr('content')),
toLogo($ => $('img[itemprop="logo"]').attr('src')),
toLogo($ => toLogoUrl($, 'brand.logo')),
toLogo($ => toLogoUrl($, 'organization.logo')),
toLogo($ => toLogoUrl($, 'place.logo')),
toLogo($ => toLogoUrl($, 'product.logo')),
toLogo($ => toLogoUrl($, 'service.logo')),
toLogo($ => toLogoUrl($, 'publisher.logo')),
toLogo($ => toLogoUrl($, 'logo.url')),
toLogo($ => toLogoUrl($, 'logo'))
]
}
}
4 changes: 3 additions & 1 deletion packages/metascraper-telegram/src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ const {
author,
date,
image,
logo,
memoizeOne,
parseUrl,
sanetizeUrl,
Expand All @@ -17,6 +18,7 @@ const got = require('got')

const toAuthor = toRule(author)
const toImage = toRule(image)
const toLogo = toRule(logo)
const toDate = toRule(date)

const TELEGRAM_DOMAINS = ['telegram.me', 't.me']
Expand Down Expand Up @@ -48,7 +50,7 @@ module.exports = ({ gotOpts, keyvOpts } = {}) => {

const rules = {
author: [toAuthor($ => $('meta[property="og:title"]').attr('content'))],
logo: [toImage($ => $('meta[property="og:image"]').attr('content'))],
logo: [toLogo($ => $('meta[property="og:image"]').attr('content'))],
image: [
toImage(
loadIframe(($iframe, url) => {
Expand Down