Skip to content
This repository has been archived by the owner on Feb 28, 2022. It is now read-only.

Commit

Permalink
feat(html): enable HAST parsing and serialization of HTML responses
Browse files Browse the repository at this point in the history
HTML response bodies will now be parsed and then serialized before the final response is created. This creates the opportunity of changing the HTML AST for HTML-post-processing

Fixes #285
  • Loading branch information
trieloff committed Apr 26, 2019
1 parent aa2538f commit 224c665
Show file tree
Hide file tree
Showing 12 changed files with 361 additions and 24 deletions.
4 changes: 4 additions & 0 deletions docs/response.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 25 additions & 0 deletions docs/response.schema.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
"micromatch": "^4.0.0",
"object-hash": "^1.3.1",
"rehype-parse": "^6.0.0",
"rehype-stringify": "^5.0.0",
"remark-parse": "^6.0.0",
"remark-rehype": "^4.0.0",
"request": "^2.87.0",
Expand Down
4 changes: 4 additions & 0 deletions src/defaults/html.pipe.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ const { cache, uncached } = require('../html/shared-cache');
const embeds = require('../html/find-embeds');
const parseFrontmatter = require('../html/parse-frontmatter');
const rewriteLinks = require('../html/static-asset-links');
const tohast = require('../html/html-to-hast');
const tohtml = require('../html/stringify-hast');

/* eslint no-param-reassign: off */
/* eslint newline-per-chained-call: off */
Expand Down Expand Up @@ -63,7 +65,9 @@ const htmlpipe = (cont, payload, action) => {
.after(cache).when(uncached)
.after(key)
.after(debug)
.after(tohast) // start HTML post-processing
.after(rewriteLinks).when(production)
.after(tohtml) // end HTML post-processing
.after(flag).expose('esi').when(esi) // flag ESI when there is ESI in the response
.error(selectStatus(production()));

Expand Down
25 changes: 25 additions & 0 deletions src/html/html-to-hast.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
/*
* Copyright 2019 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
const unified = require('unified');
const parse = require('rehype-parse');

function tohast({ response: { body } }) {
const fragment = !body.match(/<html/i);
const hast = unified().use(parse, { fragment }).parse(body);
return {
response: {
hast,
},
};
}

module.exports = tohast;
24 changes: 5 additions & 19 deletions src/html/static-asset-links.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@
* governing permissions and limitations under the License.
*/

const unified = require('unified');
const parse = require('rehype-parse');
const stringify = require('rehype-stringify');
const map = require('unist-util-map');
const Url = require('url-parse');

Expand Down Expand Up @@ -51,25 +48,14 @@ function links() {
};
}

function rewrite({ response: { body, headers } }) {
if (headers && headers['Content-Type'] && headers['Content-Type'].match(/html/)) {
const doc = unified()
.use(parse, {
fragment: false,
})
.use(scripts)
.use(links)
.use(stringify, {
allowParseErrors: true,
allowDangerousHTML: true,
allowDangerousCharacters: true,
quoteSmart: true,
function rewrite({ response: { hast, headers } }) {
if (headers && headers['Content-Type'] && headers['Content-Type'].match(/html/) && hast) {
links()(hast);
scripts()(hast);

})
.processSync(body);
return {
response: {
body: doc.contents,
hast,
},
};
}
Expand Down
32 changes: 32 additions & 0 deletions src/html/stringify-hast.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2019 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
const serialize = require('hast-util-to-html');

function stringify({ response: { hast } }) {
const body = serialize(hast, {
allowParseErrors: true,
allowDangerousHTML: true,
allowDangerousCharacters: true,
quoteSmart: true,
entities: {
useNamedReferences: true,
},
});

return {
response: {
body,
},
};
}

module.exports = stringify;
4 changes: 4 additions & 0 deletions src/schemas/response.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
}
]
},
"hast": {
"type": "object",
"description": "The Hypertext AST of the reponse body"
},
"headers": {
"description": "The HTTP headers of the response",
"additionalProperties": {
Expand Down
2 changes: 1 addition & 1 deletion test/testEmbedHandler.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,6 @@ https://www.youtube.com/watch?v=KOxbO0EI4MA
assert.equal(result.response.body, `<p>Hello World
Here comes an embed.</p>
<esi:include src="https://example-embed-service.com/https://www.youtube.com/watch?v=KOxbO0EI4MA"></esi:include>
<p><img src="easy.png" alt="Easy!" srcset="easy.png?width=480&amp;auto=webp 480w,easy.png?width=1384&amp;auto=webp 1384w,easy.png?width=2288&amp;auto=webp 2288w,easy.png?width=3192&amp;auto=webp 3192w,easy.png?width=4096&amp;auto=webp 4096w" sizes="100vw"></p>`);
<p><img src="easy.png" alt="Easy!" srcset="easy.png?width=480&amp;auto=webp 480w, easy.png?width=1384&amp;auto=webp 1384w, easy.png?width=2288&amp;auto=webp 2288w, easy.png?width=3192&amp;auto=webp 3192w, easy.png?width=4096&amp;auto=webp 4096w" sizes="100vw"></p>`);
});
});
19 changes: 16 additions & 3 deletions test/testRewriteStatic.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,23 +13,36 @@
const assert = require('assert');
const { Logger } = require('@adobe/helix-shared');
const rewrite = require('../src/html/static-asset-links');
const tohast = require('../src/html/html-to-hast');
const stringify = require('../src/html/stringify-hast');
const { pipe } = require('../src/defaults/html.pipe.js');


const logger = Logger.getTestLogger({
// tune this for debugging
level: 'info',
});

function rw(content) {
return rewrite({
const hastcontext = tohast({
response: {
body: content,
headers: {
'Content-Type': 'text/html',
},
},
}).response.body;
});

const rewritecontext = rewrite({
response: {
body: content,
hast: hastcontext.response.hast,
headers: {
'Content-Type': 'text/html',
},
},
});

return stringify(rewritecontext).response.body;
}

describe('Integration Test Static Asset Rewriting', () => {
Expand Down
120 changes: 120 additions & 0 deletions test/testStringifyHast.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
* Copyright 2018 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
/* eslint-env mocha */
const assert = require('assert');
const stringify = require('../src/html/stringify-hast');

describe('Testing stringify pipeline step', () => {
it('Simple HTML can be transformed', () => {
assert.deepEqual(
stringify({
response: {
hast: {
type: 'root',
children: [
{
type: 'element',
tagName: 'html',
properties: {},
children: [
{
type: 'element',
tagName: 'head',
properties: {},
children: [
{
type: 'text',
value: '\n ',
position: {
start: { line: 2, column: 9, offset: 15 },
end: { line: 3, column: 5, offset: 20 },
},
},
{
type: 'element',
tagName: 'title',
properties: {},
children: [
{
type: 'text',
value: 'Foo',
position: {
start: { line: 3, column: 12, offset: 27 },
end: { line: 3, column: 15, offset: 30 },
},
},
],
position: {
start: { line: 3, column: 5, offset: 20 },
end: { line: 3, column: 23, offset: 38 },
},
},
{
type: 'text',
value: '\n ',
position: {
start: { line: 3, column: 23, offset: 38 },
end: { line: 4, column: 3, offset: 41 },
},
},
],
position: {
start: { line: 2, column: 3, offset: 9 },
end: { line: 4, column: 10, offset: 48 },
},
},
{
type: 'text',
value: '\n ',
position: {
start: { line: 4, column: 10, offset: 48 },
end: { line: 5, column: 3, offset: 51 },
},
},
{
type: 'element',
tagName: 'body',
properties: {},
children: [
{
type: 'text',
value: 'bar\n',
position: {
start: { line: 5, column: 10, offset: 58 },
end: { line: 6, column: 1, offset: 69 },
},
},
],
},
],
position: {
start: { line: 1, column: 1, offset: 0 },
end: { line: 6, column: 8, offset: 76 },
},
},
],
data: { quirksMode: true },
position: {
start: { line: 1, column: 1, offset: 0 },
end: { line: 6, column: 8, offset: 76 },
},
},
},
}).response.body,
`<html><head>
<title>Foo</title>
</head>
<body>bar
</body></html>`,
);
});
});
Loading

0 comments on commit 224c665

Please sign in to comment.