Skip to content

Commit

Permalink
formatter: render HTML entities (#481)
Browse files Browse the repository at this point in the history
  • Loading branch information
bakkot authored Sep 8, 2022
1 parent 63e1f0b commit f8ce2b7
Show file tree
Hide file tree
Showing 6 changed files with 2,304 additions and 13 deletions.
1 change: 1 addition & 0 deletions entities-processed.json

Large diffs are not rendered by default.

2,233 changes: 2,233 additions & 0 deletions entities.json

Large diffs are not rendered by default.

19 changes: 19 additions & 0 deletions filter-entities.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
'use strict';

// entities.json is from https://html.spec.whatwg.org/entities.json
// which is built by https://github.com/whatwg/html-build/tree/main/entities
// this processes it into a form more useful for us
// no new entities will be added, so the output of this file is committed
// rather than running it every time

let fs = require('fs');
let entities = require('./entities.json');

let transformed = Object.fromEntries(Object.entries(entities).map(([k, v]) => {
// whitespace, default-ignorable, combining characters, control characters
if (v.characters === '&' || v.characters === '<' || /\p{White_Space}|\p{DI}|\p{gc=M}|\p{gc=C}/u.test(v.characters)) {
return [k, null];
}
return [k, v.characters];
}));
fs.writeFileSync('./entities-processed.json', JSON.stringify(transformed), 'utf8');
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
"/lib",
"/js",
"/css",
"/boilerplate"
"/boilerplate",
"/entities-processed.json"
],
"repository": "tc39/ecmarkup",
"keywords": [
Expand Down
12 changes: 12 additions & 0 deletions src/formatter/text.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,22 @@
import { LineBuilder } from './line-builder';
const entities = require('../../entities-processed.json');

export function printText(text: string, indent: number): LineBuilder {
const output: LineBuilder = new LineBuilder(indent);
if (text === '') {
return output;
}
text = text.replace(/&[a-zA-Z0-9]+;?/g, m => {
// entities[m] is null if the entity expands to '&', '<', or a string which has blank/control/etc characters
if ({}.hasOwnProperty.call(entities, m) && entities[m] !== null) {
return entities[m];
}
const lower = m.toLowerCase();
if (lower === '&le;' || lower === '&amp;') {
return lower;
}
return m;
});

const leadingSpace = text[0] === ' ' || text[0] === '\t';
const trailingSpace = text[text.length - 1] === ' ' || text[text.length - 1] === '\t';
Expand Down
49 changes: 37 additions & 12 deletions test/formatter.js
Original file line number Diff line number Diff line change
Expand Up @@ -546,32 +546,32 @@ describe('equation formatting', () => {
`
<emu-eqn id="eqn-DaysInYear" aoid="DaysInYear">
DaysInYear(_y_)
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) &ne; 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 400) = 0
</emu-eqn>
<emu-eqn id="eqn-DaysInYear" aoid="DaysInYear">DaysInYear(_y_)
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) &ne; 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 400) = 0 </emu-eqn>
`,
dedentKeepingTrailingNewline`
<emu-eqn id="eqn-DaysInYear" aoid="DaysInYear">
DaysInYear(_y_)
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) &ne; 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 400) = 0
</emu-eqn>
<emu-eqn id="eqn-DaysInYear" aoid="DaysInYear">
DaysInYear(_y_)
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) &ne; 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) &ne; 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 4) = 0 and (ℝ(_y_) modulo 100) 0
= *365*<sub>𝔽</sub> if (ℝ(_y_) modulo 100) = 0 and (ℝ(_y_) modulo 400) 0
= *366*<sub>𝔽</sub> if (ℝ(_y_) modulo 400) = 0
</emu-eqn>
`
Expand Down Expand Up @@ -652,6 +652,31 @@ describe('structured header formatting', () => {
});
});

describe('entities', () => {
it('entities are transformed or not as appropriate', async () => {
await assertDocFormatsAs(
`
<div>
some entities are transformed: &AMP; &frac12; &frac12 &fjlig; &CapitalDifferentialD;
others are preserved: &amp; &lt; &nbsp; &nbsp &NotAnEntity;
</div>
<emu-alg>
1. This also works in algorithms, as in &laquo; 0, 1 &raquo;.
</emu-alg>
`,
dedentKeepingTrailingNewline`
<div>
some entities are transformed: &amp; ½ ½ fj ⅅ
others are preserved: &amp; &lt; &nbsp; &nbsp &NotAnEntity;
</div>
<emu-alg>
1. This also works in algorithms, as in « 0, 1 ».
</emu-alg>
`
);
});
});

async function assertRoundTrips(src) {
await assertDocFormatsAs(src, src);
}
Expand Down

0 comments on commit f8ce2b7

Please sign in to comment.