diff --git a/packages/metascraper-logo-favicon/src/index.js b/packages/metascraper-logo-favicon/src/index.js index 5c92f837d..7080eca46 100644 --- a/packages/metascraper-logo-favicon/src/index.js +++ b/packages/metascraper-logo-favicon/src/index.js @@ -107,23 +107,22 @@ const pickBiggerSize = async (sizes, { gotOpts } = {}) => { pickBiggerSize.sortBySize = collection => orderBy(collection, ['size.priority'], ['desc']) -const favicon = async (url, { gotOpts } = {}) => { - const faviconUrl = logo('/favicon.ico', { url }) - if (!faviconUrl) return undefined - - const response = await reachableUrl(faviconUrl, gotOpts) - const contentType = response.headers['content-type'] - - const isValidContenType = - contentType && - ['image/vnd.microsoft.icon', 'image/x-icon'].some(ct => - contentType.includes(ct) - ) - - return isValidContenType && reachableUrl.isReachable(response) - ? response.url - : undefined -} +const createFavicon = + ({ ext, contentTypes }) => + async (url, { gotOpts } = {}) => { + const faviconUrl = logo(`/favicon.${ext}`, { url }) + if (!faviconUrl) return undefined + + const response = await reachableUrl(faviconUrl, gotOpts) + const contentType = response.headers['content-type'] + + const isValidContenType = + contentType && contentTypes.some(ct => contentType.includes(ct)) + + return isValidContenType && reachableUrl.isReachable(response) + ? response.url + : undefined + } const google = async (url, { gotOpts } = {}) => { const response = await reachableUrl(google.url(url), gotOpts) @@ -135,9 +134,19 @@ google.url = (url, size = 128) => const createGetLogo = ({ withGoogle, withFavicon, gotOpts, keyvOpts }) => { const getLogo = async url => { - const providers = [withFavicon && favicon, withGoogle && google].filter( - Boolean - ) + const providers = [ + withFavicon && + createFavicon({ + ext: 'png', + contentTypes: ['image/png'] + }), + withFavicon && + createFavicon({ + ext: 'ico', + contentTypes: ['image/vnd.microsoft.icon', 'image/x-icon'] + }), + withGoogle && google + ].filter(Boolean) for (const provider of providers) { const logoUrl = await provider(url, { gotOpts }) @@ -190,8 +199,8 @@ module.exports = ({ } } -module.exports.favicon = favicon module.exports.google = google +module.exports.createFavicon = createFavicon module.exports.createRootFavicon = createRootFavicon module.exports.createGetLogo = createGetLogo module.exports.pickBiggerSize = pickBiggerSize diff --git a/packages/metascraper-logo-favicon/test/favicon.js b/packages/metascraper-logo-favicon/test/favicon.js index 422f58f26..46cdc7019 100644 --- a/packages/metascraper-logo-favicon/test/favicon.js +++ b/packages/metascraper-logo-favicon/test/favicon.js @@ -2,20 +2,33 @@ const test = require('ava') -const { favicon } = require('..') +const { createFavicon } = require('..') const { runServer } = require('./helpers') +const faviconPNG = createFavicon({ ext: 'png', contentTypes: ['image/png'] }) +const faviconICO = createFavicon({ + ext: 'ico', + contentTypes: ['image/vnd.microsoft.icon', 'image/x-icon'] +}) + test('return undefined if favicon is not reachable', async t => { const url = 'https://idontexist.lol' - t.is(await favicon(url), undefined) + t.is(await faviconICO(url), undefined) }) test("don't resolve favicon.ico with no content-type", async t => { const url = await runServer(t, async ({ res }) => { res.end('') }) - t.is(await favicon(url), undefined) + t.is(await faviconICO(url), undefined) +}) + +test("don't resolve favicon.png with no content-type", async t => { + const url = await runServer(t, async ({ res }) => { + res.end('') + }) + t.is(await faviconPNG(url), undefined) }) test("don't resolve favicon.ico with no valid content-type", async t => { @@ -23,17 +36,22 @@ test("don't resolve favicon.ico with no valid content-type", async t => { res.setHeader('content-type', 'image/svg+xml; charset=utf-8') res.end('') }) - t.is(await favicon(url), undefined) + t.is(await faviconICO(url), undefined) +}) + +test("favicon.png with 'image/png' content-type", async t => { + const url = 'https://adroll.com/' + t.is(await faviconPNG(url), 'https://adroll.com/favicon.png') }) test("favicon.ico with 'image/vnd.microsoft.icon' content-type", async t => { const url = 'https://microlink.io/' - t.is(await favicon(url), 'https://microlink.io/favicon.ico') + t.is(await faviconICO(url), 'https://microlink.io/favicon.ico') }) test("favicon.ico with 'image/x-icon' content-type", async t => { const url = 'https://2miners.com/' - t.is(await favicon(url), 'https://2miners.com/favicon.ico') + t.is(await faviconICO(url), 'https://2miners.com/favicon.ico') }) test('handle redirects', async t => { @@ -41,5 +59,5 @@ test('handle redirects', async t => { res.writeHead(301, { Location: 'https://microlink.io/favicon.ico' }) res.end() }) - t.is(await favicon(url), 'https://microlink.io/favicon.ico') + t.is(await faviconICO(url), 'https://microlink.io/favicon.ico') }) diff --git a/packages/metascraper/test/integration/segment/index.js b/packages/metascraper/test/integration/segment/index.js index 453482418..fd6e52a5f 100644 --- a/packages/metascraper/test/integration/segment/index.js +++ b/packages/metascraper/test/integration/segment/index.js @@ -26,6 +26,5 @@ const url = 'https://segment.com/blog/scaling-nsq' test('segment', async t => { const html = await readFile(resolve(__dirname, 'input.html')) const metadata = await metascraper({ html, url }) - metadata.logo.replace('t3.gstatic.com', 't1.gstatic.com') t.snapshot(metadata) }) diff --git a/packages/metascraper/test/integration/segment/snapshots/index.js.md b/packages/metascraper/test/integration/segment/snapshots/index.js.md index 430450f8f..8492fa8a7 100644 --- a/packages/metascraper/test/integration/segment/snapshots/index.js.md +++ b/packages/metascraper/test/integration/segment/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Segment is the analytics API you’ve always wanted. It’s the easiest way to install all of your favorite analytics tools at once!', image: 'https://c19f7be2e84987e7904e-bf41efcb49679c193a4ec0f3210da86f.ssl.cf1.rackcdn.com/photos/40528-1-1.jpg', lang: null, - logo: 'https://t1.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://segment.com/blog/scaling-nsq&size=128', + logo: 'https://t0.gstatic.com/faviconV2?client=SOCIAL&type=FAVICON&fallback_opts=TYPE,SIZE,URL&url=https://segment.com/blog/scaling-nsq&size=128', publisher: 'Segment Blog', title: 'Scaling NSQ to 750 Billion Messages', url: 'https://segment.com/blog/scaling-nsq', diff --git a/packages/metascraper/test/integration/segment/snapshots/index.js.snap b/packages/metascraper/test/integration/segment/snapshots/index.js.snap index 41945f377..93d22a4fd 100644 Binary files a/packages/metascraper/test/integration/segment/snapshots/index.js.snap and b/packages/metascraper/test/integration/segment/snapshots/index.js.snap differ diff --git a/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.md b/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.md index 6c75dd664..33843115c 100644 --- a/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.md +++ b/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Cucumber Yogurt Raita Salad Think of this as cucumber raita with the proportions inverted -- a lot of cucumber, a smaller amount of yogurt dressing -- but it’s not overly technical, as there are ingredients here not common in raita. Notes: Feel free to play around with this salad; each seed option will provide a…', image: 'http://smittenkitchen.com/wp-content/uploads/cucumber-yogurt-raita-salad-300x200.jpg', lang: null, - logo: 'http://smittenkitchen.com/favicon.ico', + logo: 'https://smittenkitchen.com/favicon.ico', publisher: 'smitten kitchen', title: 'cucumber yogurt raita salad', url: 'http://smittenkitchen.com/blog/2016/05/cucumber-yogurt-raita-salad/', diff --git a/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.snap b/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.snap index 246b787b2..af2b665b5 100644 Binary files a/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.snap and b/packages/metascraper/test/integration/smitten-kitchen/snapshots/index.js.snap differ diff --git a/packages/metascraper/test/integration/techcrunch/snapshots/index.js.md b/packages/metascraper/test/integration/techcrunch/snapshots/index.js.md index f51f61c90..778f9d6d3 100644 --- a/packages/metascraper/test/integration/techcrunch/snapshots/index.js.md +++ b/packages/metascraper/test/integration/techcrunch/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Recruiting software engineers is a massive headache for both startups and established companies. For a while now, HackerRank has tried to make both applying…', image: 'https://tctechcrunch2011.files.wordpress.com/2015/08/10-interviewed.png?w=720&h=388&crop=1', lang: 'en', - logo: 'http://techcrunch.com/favicon.ico', + logo: 'https://techcrunch.com/favicon.ico', publisher: 'TechCrunch', title: 'HackerRank Makes Technical Recruiting More Transparent', url: 'http://social.techcrunch.com/2016/01/12/hackerrank-jobs-takes-the-mystery-out-of-technical-recruiting/', diff --git a/packages/metascraper/test/integration/techcrunch/snapshots/index.js.snap b/packages/metascraper/test/integration/techcrunch/snapshots/index.js.snap index eadc8bb10..67f0fe3af 100644 Binary files a/packages/metascraper/test/integration/techcrunch/snapshots/index.js.snap and b/packages/metascraper/test/integration/techcrunch/snapshots/index.js.snap differ diff --git a/packages/metascraper/test/integration/venture-beat/snapshots/index.js.md b/packages/metascraper/test/integration/venture-beat/snapshots/index.js.md index 208e43f3a..d86c6e699 100644 --- a/packages/metascraper/test/integration/venture-beat/snapshots/index.js.md +++ b/packages/metascraper/test/integration/venture-beat/snapshots/index.js.md @@ -14,7 +14,7 @@ Generated by [AVA](https://avajs.dev). description: 'Forter, a security-focused company that brings real-time fraud-prevention technology to online retailers, has raised $32 million to continue its global growth and expand across the U.S.', image: 'http://1u88jj3r4db2x4txp44yqfj1.wpengine.netdna-cdn.com/wp-content/uploads/2016/04/ecommerce-780x473.jpg', lang: 'en', - logo: 'http://venturebeat.com/favicon.ico', + logo: 'https://venturebeat.com/favicon.ico', publisher: 'VentureBeat', title: 'Forter raises $32 million to automate retailers’ battle against online fraud', url: 'http://venturebeat.com/2016/04/21/forter-raises-32-million-to-automate-retailers-battle-against-online-fraud/', diff --git a/packages/metascraper/test/integration/venture-beat/snapshots/index.js.snap b/packages/metascraper/test/integration/venture-beat/snapshots/index.js.snap index 91a1373ae..02ed14655 100644 Binary files a/packages/metascraper/test/integration/venture-beat/snapshots/index.js.snap and b/packages/metascraper/test/integration/venture-beat/snapshots/index.js.snap differ diff --git a/packages/metascraper/test/integration/zdnet/snapshots/index.js.md b/packages/metascraper/test/integration/zdnet/snapshots/index.js.md index 0ffab52a8..90b5be245 100644 --- a/packages/metascraper/test/integration/zdnet/snapshots/index.js.md +++ b/packages/metascraper/test/integration/zdnet/snapshots/index.js.md @@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev). description: 'Founded in 2009, Agari perviously partnered with PayPal, Yahoo and Google to create the DMARC authentication protocol.', image: 'http://zdnet4.cbsistatic.com/hub/i/r/2016/05/09/292bfdbf-c37f-4cd2-a5b6-2d9ee9920aef/thumbnail/770x578/053a9fc8f93b4bafd36dcc90c3debd12/istock000074135653medium.jpg', lang: 'en', - logo: 'http://www.zdnet.com/favicon.ico', + logo: 'https://www.zdnet.com/favicon.ico', publisher: 'ZDNet', title: 'Email security startup Agari raises $22 million to help enterprises fight phishing attacks | ZDNet', url: 'http://www.zdnet.com/article/email-security-startup-agari-raises-22-million-to-help-enterprises-fight-phishing-attacks/', diff --git a/packages/metascraper/test/integration/zdnet/snapshots/index.js.snap b/packages/metascraper/test/integration/zdnet/snapshots/index.js.snap index b969e4276..1f236ffe8 100644 Binary files a/packages/metascraper/test/integration/zdnet/snapshots/index.js.snap and b/packages/metascraper/test/integration/zdnet/snapshots/index.js.snap differ