From 61aee97ff55dfe3c7ab7a51d1bdbd12282b33c01 Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Sun, 16 Dec 2018 20:14:55 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20Ensure=20to=20resolve=20media=20URLs=20?= =?UTF-8?q?BREAKING=20CHANGE:=20Rename=20fetchMode=20=E2=86=92=20getMode?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 ++-- package.json | 1 + src/index.js | 107 ++++++++++++++++++++++++++++------ test/index.js | 33 +++++++++++ test/snapshots/index.js.md | 110 ++++++++++++++++++++++++++++++++++- test/snapshots/index.js.snap | Bin 412 -> 984 bytes 6 files changed, 237 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 956e133..a398eba 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Enable or disable prerendering as mechanism for getting the HTML markup explicit The value `auto` means that that internally use a list of whitelist website that don't need to use prerendering by default. This list is used for speedup the process, using `fetch` mode for these websites. -See [fetchMode parameter](#fetchMode) for know more. +See [getMode parameter](#getMode) for know more. ##### getBrowserless @@ -89,19 +89,19 @@ Encoding the HTML markup properly from the body response. It determines the encode to use A Node.js library for converting HTML documents of arbitrary encoding into a target encoding (utf8, utf16, etc). -##### fetchMode +##### getMode Type: `function`
A function evaluation that will be invoked to determinate the resolutive `mode` for getting the HTML markup from the target URL. -The default `fetchMode` is: +The default `getMode` is: ```js -const getFetchMode = (url, { prerender }) => { +const getMode = (url, { prerender }) => { if (prerender === false) return 'fetch' if (prerender !== 'auto') return 'prerender' - return autoDomains.includes(parseDomain(url).domain) ? 'fetch' : 'prerender' + return autoDomains.includes(getDomain(url)) ? 'fetch' : 'prerender' } ``` diff --git a/package.json b/package.json index 1d87cff..9fb36b9 100644 --- a/package.json +++ b/package.json @@ -35,6 +35,7 @@ "request" ], "dependencies": { + "@metascraper/helpers": "~4.8.3", "browserless": "~4.2.1", "debug": "~4.1.0", "got": "~9.4.0", diff --git a/src/index.js b/src/index.js index 7f04521..9d8c3e7 100644 --- a/src/index.js +++ b/src/index.js @@ -1,5 +1,6 @@ 'use strict' +const { isMime } = require('@metascraper/helpers') const createBrowserless = require('browserless') const reachableUrl = require('reachable-url') const parseDomain = require('parse-domain') @@ -9,6 +10,7 @@ const htmlEncode = require('html-encode') const timeSpan = require('time-span') const pTimeout = require('p-timeout') const { URL } = require('url') +const path = require('path') const got = require('got') const mem = require('mem') const he = require('he') @@ -32,12 +34,12 @@ const REQ_TIMEOUT_REACHABLE = REQ_TIMEOUT * 0.25 const getUrl = mem( async targetUrl => { try { - const { url } = await reachableUrl(targetUrl, { + const res = await reachableUrl(targetUrl, { timeout: REQ_TIMEOUT_REACHABLE }) - return url + return res } catch (err) { - return targetUrl + return { url: targetUrl, headers: {} } } }, { maxAge: ONE_DAY_MS } @@ -73,23 +75,18 @@ const fetch = (url, { toEncode, reflect = false, ...opts }) => }) const prerender = async ( - targetUrl, + url, { getBrowserless, gotOptions, toEncode, ...opts } ) => { let fetchReq let fetchDataProps = {} let isFetchRejected = false let html = '' - let url try { - debug(`getUrl:resolving`) - url = await getUrl(targetUrl) - debug(`getUrl:resolved ${targetUrl} → ${url}`) fetchReq = fetch(url, { reflect: true, toEncode, ...gotOptions }) const browserless = await getBrowserless() html = await pTimeout(browserless.html(url, opts), REQ_TIMEOUT) - await fetchReq.cancel() debug('prerender:success') return { url, html: getHtml(html), mode: 'prerender' } @@ -104,20 +101,96 @@ const prerender = async ( return isFetchRejected ? { url, html, mode: 'prerender' } : fetchDataProps } -const FETCH_MODE = { fetch, prerender } +const modes = { fetch, prerender } -const getFetchMode = (url, { prerender }) => { +const determinateMode = (url, { prerender }) => { if (prerender === false) return 'fetch' if (prerender !== 'auto') return 'prerender' return autoDomains.includes(getDomain(url)) ? 'fetch' : 'prerender' } +const baseHtml = (url, headers, html = '') => { + const { hostname } = new URL(url) + const { date, expires } = headers + + return { + url, + mode: 'fetch', + html: ` + + + + + + + + ${path.basename(url)} + + ${ + date + ? `` + : '' +} + ${ + expires + ? `` + : '' +} + + + + ${html} + + + + + + + `.trim() + } +} + +const getImageHtml = (url, headers) => + baseHtml(url, headers, ``) + +const getVideoHtml = (url, headers) => { + const { protocol } = new URL(url) + const isHttps = protocol === 'https:' + const videoProperty = `og:video${isHttps ? ':secure_url' : ''}` + return baseHtml( + url, + headers, + `` + ) +} + +const getAudioHtml = (url, headers) => { + const { protocol } = new URL(url) + const isHttps = protocol === 'https:' + const audioProperty = `og:audio${isHttps ? ':secure_url' : ''}` + return baseHtml( + url, + headers, + `` + ) +} + +const getContent = async (encodedUrl, mode, opts) => { + const { url, headers } = await getUrl(encodedUrl) + debug(`getUrl ${encodedUrl === url ? url : `${encodedUrl} → ${url}`}`) + const contentType = headers['content-type'] + if (isMime(contentType, 'image')) return getImageHtml(url, headers) + if (isMime(contentType, 'video')) return getVideoHtml(url, headers) + if (isMime(contentType, 'audio')) return getAudioHtml(url, headers) + return modes[mode](url, opts) +} + module.exports = async ( targetUrl, { getBrowserless = createBrowserless, encoding = 'utf-8', - fetchMode = getFetchMode, + getMode = determinateMode, gotOptions, prerender = 'auto', puppeteerOpts @@ -125,17 +198,15 @@ module.exports = async ( ) => { const { href: encodedUrl } = new URL(targetUrl) const toEncode = htmlEncode(encoding) - const targetFetchMode = fetchMode(encodedUrl, { prerender }) + const reqMode = getMode(encodedUrl, { prerender }) + const opts = - targetFetchMode === 'fetch' + reqMode === 'fetch' ? { toEncode, ...gotOptions } : { toEncode, getBrowserless, gotOptions, ...puppeteerOpts } const time = timeSpan() - const { url, html, mode } = await FETCH_MODE[targetFetchMode]( - encodedUrl, - opts - ) + const { url, html, mode } = await getContent(encodedUrl, reqMode, opts) return { url, html, stats: { mode, timing: time() } } } diff --git a/test/index.js b/test/index.js index 493cfbc..a2dc092 100644 --- a/test/index.js +++ b/test/index.js @@ -73,3 +73,36 @@ test('unencoded URL', async t => { 'https://medium.com/@Acegikmo/the-ever-so-lovely-b%C3%A9zier-curve-eb27514da3bf' ) }) + +test('get html from audio url', async t => { + const url = 'https://audiodemos.github.io/vctk_set0/embedadapt_100sample.wav' + const { url: urlDetected, stats, html } = await getHTML(url, { + prerender: false + }) + + t.true(!!html) + t.is(stats.mode, 'fetch') + t.is(url, urlDetected) +}) + +test('get html from image url', async t => { + const url = 'https://kikobeats.com/images/avatar.jpg' + const { url: urlDetected, stats, html } = await getHTML(url, { + prerender: false + }) + + t.true(!!html) + t.is(stats.mode, 'fetch') + t.is(url, urlDetected) +}) + +test('get html from video url', async t => { + const url = 'https://microlink.io/preview.mp4' + const { url: urlDetected, stats, html } = await getHTML(url, { + prerender: false + }) + + t.true(!!html) + t.is(stats.mode, 'fetch') + t.is(url, urlDetected) +}) diff --git a/test/snapshots/index.js.md b/test/snapshots/index.js.md index b59abad..147a8c4 100644 --- a/test/snapshots/index.js.md +++ b/test/snapshots/index.js.md @@ -34,4 +34,112 @@ Generated by [AVA](https://ava.li). ␊ ␊ - ` + + +## get htmnl from audio url + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + embedadapt_100sample.wav␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + + +## get html from audio url + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + embedadapt_100sample.wav␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` + +## get html from image url + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + avatar.jpg␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + + +## get html from video url + +> Snapshot 1 + + `␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + preview.mp4␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ␊ + ` \ No newline at end of file diff --git a/test/snapshots/index.js.snap b/test/snapshots/index.js.snap index bc4190563d8303ef0280557a4ec57c10b84b853d..4843fbd1b7b8401116c906d94c1a984447316614 100644 GIT binary patch literal 984 zcmV;}11J1JRzV<3?4C_&w1pIqs!z(ARmhe00000000y1 zmd%SCMHt3wb~h#nLKH$yfuaxQvOP7u$;L@%dO%DfOArrc!HW#l-S19UrmL&0uAa$U z1VjD+1wlLrUUVL2wMLg*Cw{XzJ%f8k;8v+dn;4(o8O@hu= zidKCFq5D5v?BDux>*BlNy{|vLvG#GLXno@;gjV1A-nsSiF8}8JQxDcYeD8}&(fZ5N z2>lw?pS|~J_wz5G5TAT>{*SjSMeASZ5L!W}3-8JrdS>OjN+Z%dZZ;yIVY5L77hi<^ zW0<85ri$x#uQ)4l6CA{!Pg~21{B;k^T|4R}EOA;=egtSXFRnmR2!&#t2 zzTb}Ln03WKIDr@VEHX`?g(udPgBL_NHNj9J{( zq=l^Vh(X2TT}pcbuXwctlr&Y+)qoM3Twn1swi&8q={e^b8fzRfnwkos^3X^DALR==aV|uX G0{{TIl*CN{