Skip to content

Commit

Permalink
fix: ensure to decode HTML entities
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats committed Nov 17, 2018
1 parent 970cfcb commit d47fd4e
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 4 deletions.
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@
"browserless": "~4.2.1",
"debug": "~4.1.0",
"got": "~9.3.2",
"html-encode": "~2.1.0",
"he": "~1.2.0",
"html-encode": "~2.1.1",
"mem": "~4.0.0",
"p-cancelable": "~1.0.0",
"p-timeout": "~2.0.1",
Expand All @@ -60,7 +61,7 @@
"nyc": "latest",
"prettier-standard": "latest",
"puppeteer": "latest",
"standard": "11",
"standard": "12",
"standard-markdown": "latest",
"standard-version": "latest",
"top-sites": "latest",
Expand Down
7 changes: 5 additions & 2 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const htmlEncode = require('html-encode')
const timeSpan = require('time-span')
const pTimeout = require('p-timeout')
const mem = require('mem')
const he = require('he')

const got = require('got')

Expand Down Expand Up @@ -44,6 +45,8 @@ const getUrl = mem(

const getDomain = url => (parseDomain(url) || {}).domain

const getHtml = html => he.decode(html)

const fetch = (url, { toEncode, reflect = false, ...opts }) =>
new PCancelable(async (resolve, reject, onCancel) => {
const req = got(url, {
Expand All @@ -58,7 +61,7 @@ const fetch = (url, { toEncode, reflect = false, ...opts }) =>
const res = await req
return resolve({
url: res.url,
html: await toEncode(res.body, res.headers['content-type']),
html: getHtml(await toEncode(res.body, res.headers['content-type'])),
mode: 'fetch'
})
} catch (err) {
Expand Down Expand Up @@ -89,7 +92,7 @@ const prerender = async (

await fetchReq.cancel()
debug('prerender:success')
return { url, html, mode: 'prerender' }
return { url, html: getHtml(html), mode: 'prerender' }
} catch (err) {
debug('prerender:error', err)
const { isRejected, ...dataProps } = await fetchReq
Expand Down
7 changes: 7 additions & 0 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,10 @@ test('unreachable urls', async t => {
t.is(await wait(getHTML(url, { prerender: false }), 'html'), '')
t.is(await wait(getHTML(url, { prerender: true }), 'html'), '')
})

test.only('decode base64 entities', async t => {
const url =
'https://gist.githubusercontent.com/Kikobeats/912a6c2158de3f3c30d0d7c7697af393/raw/d47d9df77696d9a42df192b7aedbf6cfd2ad393e/index.html'
t.snapshot(await wait(getHTML(url, { prerender: true }), 'html'))
t.snapshot(await wait(getHTML(url, { prerender: false }), 'html'))
})
37 changes: 37 additions & 0 deletions test/snapshots/index.js.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Snapshot report for `test/index.js`

The actual snapshot is saved in `index.js.snap`.

Generated by [AVA](https://ava.li).

## decode base64 entities

> Snapshot 1
`<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;"><html lang="en">␊
<head>␊
<meta charset="UTF-8">␊
<meta name="viewport" content="width=device-width, initial-scale=1.0">␊
<meta http-equiv="X-UA-Compatible" content="ie=edge">␊
<title>Document</title>␊
</head>␊
<body>␊
<a href="https://httpbin-org.herokuapp.com/redirect/3"></a>␊
<a href="https://github.com/kikobeats/splashy"></a>␊
</body>␊
</html></pre></body></html>`

> Snapshot 2
`<html lang="en">␊
<head>␊
<meta charset="UTF-8">␊
<meta name="viewport" content="width=device-width, initial-scale=1.0">␊
<meta http-equiv="X-UA-Compatible" content="ie=edge">␊
<title>Document</title>␊
</head>␊
<body>␊
<a href="https://httpbin-org.herokuapp.com/redirect/3"></a>␊
<a href="https://github.com/kikobeats/splashy"></a>␊
</body>␊
</html>`
Binary file added test/snapshots/index.js.snap
Binary file not shown.

0 comments on commit d47fd4e

Please sign in to comment.