Skip to content

Commit

Permalink
feat(html): remove localhost alike URLs (#193)
Browse files Browse the repository at this point in the history
  • Loading branch information
Kikobeats authored Feb 17, 2024
1 parent dca52a3 commit 3cf2927
Show file tree
Hide file tree
Showing 11 changed files with 487 additions and 423 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@
"html-encode": "~2.1.6",
"html-urls": "~2.4.55",
"is-html-content": "~1.0.0",
"localhost-url-regex": "~1.0.11",
"lodash": "~4.17.21",
"mri": "~1.2.0",
"p-cancelable": "~2.1.0",
"p-retry": "~4.6.0",
"replace-string": "~3.1.0",
"tinyspawn": "~1.2.6",
"top-sites": "~1.1.205"
},
Expand Down
8 changes: 5 additions & 3 deletions src/html.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
'use strict'

const { get, split, nth, castArray, forEach } = require('lodash')
const localhostUrl = require('localhost-url-regex')
const { TAGS: URL_TAGS } = require('html-urls')
const replaceString = require('replace-string')
const isHTML = require('is-html-content')
const cssUrl = require('css-url-regex')
const execall = require('execall')
Expand Down Expand Up @@ -95,7 +95,9 @@ const rewriteHtmlUrls = ({ $, url }) => {
const el = $(this)
const attr = el.attr(urlAttr)

if (typeof attr === 'string' && !attr.startsWith('http')) {
if (localhostUrl().test(attr)) {
el.remove()
} else if (typeof attr === 'string' && !attr.startsWith('http')) {
try {
const newAttr = new URL(attr, url).toString()
el.attr(urlAttr, newAttr)
Expand All @@ -117,7 +119,7 @@ const rewriteCssUrls = ({ html, url }) => {
if (cssUrl.startsWith('/')) {
try {
const absoluteUrl = new URL(cssUrl, url).toString()
html = replaceString(html, `url(${cssUrl})`, `url(${absoluteUrl})`)
html = html.replaceAll(`url(${cssUrl})`, `url(${absoluteUrl})`)
} catch (_) {}
}
})
Expand Down
220 changes: 0 additions & 220 deletions test/html/index.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
/* eslint-disable prefer-regex-literals */

'use strict'

const cheerio = require('cheerio')
const execall = require('execall')
const path = require('path')
const test = require('ava')
const fs = require('fs')

const { prettyHtml } = require('../util')

Expand Down Expand Up @@ -88,221 +83,6 @@ test('add video markup', t => {
t.snapshot(prettyHtml(output))
})

test("'`rewriteCssUrls` don't modify html markup", t => {
const output = html({
rewriteUrls: true,
url: 'https://www.rubiomonocoatusa.com/blogs/blog/how-to-apply-oil-plus-2c-to-furniture',
html: `<!DOCTYPE html>
<html>
<head>
<style>body { background: url(//cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305) }</style>
<meta property="og:image" content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305">
</head>
<body></body>
</html>`,
headers: {
'content-type': 'text/html; charset=utf-8'
}
})

t.true(
output.includes(
'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"'
)
)

t.true(
output.includes(
'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)'
)
)

t.snapshot(prettyHtml(output))
})

test('`rewriteHtmlUrls` rewrites relative root URLs inside html markup', t => {
const output = html({
rewriteUrls: true,
url: 'https://browserless.js.org',
html: fs.readFileSync(
path.resolve(__dirname, '../fixtures/browserless.html'),
'utf8'
),
headers: {
'content-type': 'text/html; charset=utf-8'
}
})

t.true(output.includes('https://browserless.js.org/static/main.min.js'))
t.true(output.includes('https://unpkg.com/docsify/lib/docsify.min.js'))

t.snapshot(prettyHtml(output))
})

test('`rewriteHtmlUrls` rewrites relative URLs inside html markup', t => {
const output = html({
rewriteUrls: true,
url: 'https://moovility.me/',
html: `<!DOCTYPE html>
<html>
<head>
<link rel="apple-touch-icon" href="img/icons/MOV/icon2-76.png" sizes="76x76">
</head>
<body></body>
</html>`,
headers: {
'content-type': 'text/html; charset=utf-8'
}
})

t.true(output.includes('https://moovility.me/img/icons/MOV/icon2-76.png'))

t.snapshot(prettyHtml(output))
})

test('`rewriteHtmlUrls` rewrites relative URLs inside stylesheet', t => {
const output = html({
rewriteUrls: true,
url: 'https://kikobeats.com',
html: `
<html lang="en">
<body>
<div style="background-image: url(/images/microlink.jpg)"></div>
<div style="background-image: url(/images/microlink.jpg)"></div>
</body>
</html>
`,
headers: {
'content-type': 'text/html; charset=utf-8'
}
})

const results = execall(
new RegExp('https://kikobeats.com/images/microlink.jpg', 'g'),
output
)

t.is(results.length, 2)
t.snapshot(prettyHtml(output))
})

test("`rewriteHtmlUrls` don't modify inline javascript", t => {
const output = html({
rewriteUrls: true,
url: 'https://www.latimes.com/opinion/story/2020-06-07/column-muralist-honors-african-americans-killed-by-police',
html: `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<a class="ActionLink" data-social-service="print" href="javascript:window.print()"><svg><use xlink:href="#mono-icon-print"></use></svg><span>Print</span></a>
</body>
</html>`,
headers: {
'content-type': 'text/html;charset=UTF-8'
}
})

t.true(
output.includes(
'<a class="ActionLink" data-social-service="print" href="javascript:window.print()"><svg><use xlink:href="#mono-icon-print"></use></svg><span>Print</span></a>'
)
)

t.snapshot(prettyHtml(output))
})

test("`rewriteHtmlUrls` don't modify non http protocols", t => {
const output = html({
rewriteUrls: true,
url: 'https://www.latimes.com/opinion/story/2020-06-07/column-muralist-honors-african-americans-killed-by-police',
html: `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<a href="mailto:jen@oreilly.com"></a>
<a href="ftp://user:password@server/pathname"></a>
<a href="file://server/path"></a>
<a href="nntp://server:port/newsgroup/article"></a>
<a href="telnet://user:password@server:port/"/></a>
<a href="gopher://docstore.mik.ua/orelly.htm"></a>
</body>
</html>`,
headers: {
'content-type': 'text/html;charset=UTF-8'
}
})

t.true(output.includes('<a href="mailto:jen@oreilly.com"></a>'))
t.true(output.includes('<a href="ftp://user:password@server/pathname"></a>'))
t.true(output.includes('<a href="file://server/path'))
t.true(output.includes('<a href="nntp://server:port/newsgroup/article"></a>'))
t.true(output.includes('<a href="telnet://user:password@server:port/"></a>'))
t.true(output.includes('<a href="gopher://docstore.mik.ua/orelly.htm"></a>'))

t.snapshot(prettyHtml(output))
})

test("`rewriteHtmlUrls` don't modify data URIs", t => {
const output = html({
rewriteUrls: true,
url: 'https://example.com',
html: `
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
<img src="" alt="star" width="16" height="16">
</body>
</html>`,
headers: {
'content-type': 'text/html;charset=UTF-8'
}
})

t.true(
output.includes(
'<img src="" alt="star" width="16" height="16">'
)
)

t.snapshot(prettyHtml(output))
})

test("`rewriteHtmlUrls` don't modify undefined attributes", t => {
const output = html({
rewriteUrls: true,
url: 'https://moovility.me',
html: `
<!DOCTYPE html>
<html lang="en">
<head>
<title>Document</title>
</head>
<body>
<script>console.log('greetings')</script>
</body>
</html>`,
headers: {
'content-type': 'text/html;charset=UTF-8'
}
})

t.true(output.includes("<script>console.log('greetings')</script>"))

t.snapshot(prettyHtml(output))
})

test('styles injection', t => {
const output = html({
url: 'https://kikobeats.com',
Expand Down
66 changes: 66 additions & 0 deletions test/html/rewrite-css-urls.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
'use strict'

const execall = require('execall')
const test = require('ava')

const { prettyHtml } = require('../util')

const html = require('../../src/html')

test("don't modify html markup", t => {
const output = html({
rewriteUrls: true,
url: 'https://www.rubiomonocoatusa.com/blogs/blog/how-to-apply-oil-plus-2c-to-furniture',
html: `<!DOCTYPE html>
<html>
<head>
<style>body { background: url(//cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305) }</style>
<meta property="og:image" content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305">
</head>
<body></body>
</html>`,
headers: {
'content-type': 'text/html; charset=utf-8'
}
})

t.true(
output.includes(
'content="http://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305"'
)
)

t.true(
output.includes(
'url(https://cdn.shopify.com/s/files/1/0260/4810/2497/articles/Applying-Oil-Plus-2C-to-a-table_600x.jpg?v=1616464305)'
)
)

t.snapshot(prettyHtml(output))
})

test('rewrites relative URLs inside stylesheet', t => {
const output = html({
rewriteUrls: true,
url: 'https://kikobeats.com',
html: `
<html lang="en">
<body>
<div style="background-image: url(/images/microlink.jpg)"></div>
<div style="background-image: url(/images/microlink.jpg)"></div>
</body>
</html>
`,
headers: {
'content-type': 'text/html; charset=utf-8'
}
})

const results = execall(
/https:\/\/kikobeats.com\/images\/microlink\.jpg/g,
output
)

t.is(results.length, 2)
t.snapshot(prettyHtml(output))
})
Loading

0 comments on commit 3cf2927

Please sign in to comment.