Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

i18n: upgrade to latest icu formatter #13834

Merged
merged 15 commits into from
Oct 5, 2023
2 changes: 1 addition & 1 deletion build/build-bundle.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ async function build(entryPath, distPath, opts = {minify: true}) {
'__filename': (id) => `'${path.relative(LH_ROOT, id)}'`,
// This package exports to default in a way that causes Rollup to get confused,
// resulting in MessageFormat being undefined.
'require(\'intl-messageformat\').default': 'require(\'intl-messageformat\')',
// 'require(\'intl-messageformat\').default': 'require(\'intl-messageformat\')',
connorjclark marked this conversation as resolved.
Show resolved Hide resolved
// Below we replace lighthouse-logger with a local copy, which is ES modules. Need
// to change every require of the package to reflect this.
'require(\'lighthouse-logger\');': 'require(\'lighthouse-logger\').default;',
Expand Down
2 changes: 1 addition & 1 deletion flow-report/src/summary/category.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ const SummaryTooltip: FunctionComponent<{
{
!displayAsFraction && category.score !== null && <>
<span> · </span>
<span>{i18n.formatNumber(category.score * 100)}</span>
<span>{i18n.formatInteger(category.score * 100)}</span>
</>
}
</div>
Expand Down
1 change: 1 addition & 0 deletions lighthouse-core/audits/bootup-time.js
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ class BootupTime extends Audit {
totalBootupTime
);


return {
score,
numericValue: totalBootupTime,
Expand Down
4 changes: 2 additions & 2 deletions lighthouse-core/audits/installable-manifest.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ const UIStrings = {
/** Error message explaining that the provided manifest URL is invalid. */
'start-url-not-valid': `Manifest start URL is not valid`,
/** Error message explaining that the provided manifest does not contain a name or short_name field. */
'manifest-missing-name-or-short-name': `Manifest does not contain a 'name' or 'short_name' field`,
'manifest-missing-name-or-short-name': 'Manifest does not contain a `name` or `short_name` field',
/** Error message explaining that the manifest display property must be one of 'standalone', 'fullscreen', or 'minimal-ui'. */
'manifest-display-not-supported': `Manifest 'display' property must be one of 'standalone', 'fullscreen', or 'minimal-ui'`,
'manifest-display-not-supported': 'Manifest `display` property must be one of `standalone`, `fullscreen`, or `minimal-ui`',
/** Error message explaining that the manifest could not be fetched, might be empty, or could not be parsed. */
'manifest-empty': `Manifest could not be fetched, is empty, or could not be parsed`,
/** Error message explaining that no matching service worker was detected,
Expand Down
12 changes: 6 additions & 6 deletions lighthouse-core/lib/csp-evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,25 @@ const UIStrings = {
/** Message shown when a CSP does not have a base-uri directive. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "base-uri", "'none'", and "'self'" do not need to be translated. */
missingBaseUri: 'Missing base-uri allows injected <base> tags to set the base URL for all ' +
'relative URLs (e.g. scripts) to an attacker controlled domain. ' +
'Consider setting base-uri to \'none\' or \'self\'.',
'Consider setting base-uri to `none` or `self`.',
/** Message shown when a CSP does not have a script-src directive. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "script-src" does not need to be translated. */
missingScriptSrc: 'script-src directive is missing. ' +
'This can allow the execution of unsafe scripts.',
/** Message shown when a CSP does not have a script-src directive. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "object-src" and "'none'" do not need to be translated. */
missingObjectSrc: 'Missing object-src allows the injection of plugins ' +
'that execute unsafe scripts. Consider setting object-src to \'none\' if you can.',
'that execute unsafe scripts. Consider setting object-src to `none` if you can.',
/** Message shown when a CSP uses a domain allowlist to filter out malicious scripts. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "CSP", "'strict-dynamic'", "nonces", and "hashes" do not need to be translated. "allowlists" can be interpreted as "whitelist". */
strictDynamic: 'Host allowlists can frequently be bypassed. Consider using ' +
'CSP nonces or hashes instead, along with \'strict-dynamic\' if necessary.',
'CSP nonces or hashes instead, along with `strict-dynamic` if necessary.',
/** Message shown when a CSP allows inline scripts to be run in the page. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "CSP", "'unsafe-inline'", "nonces", and "hashes" do not need to be translated. */
unsafeInline: '\'unsafe-inline\' allows the execution of unsafe in-page scripts ' +
unsafeInline: '`unsafe-inline` allows the execution of unsafe in-page scripts ' +
'and event handlers. Consider using CSP nonces or hashes to allow scripts individually.',
/** Message shown when a CSP is not backwards compatible with browsers that do not support CSP nonces/hashes. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "'unsafe-inline'", "nonces", and "hashes" do not need to be translated. */
unsafeInlineFallback: 'Consider adding \'unsafe-inline\' (ignored by browsers supporting ' +
unsafeInlineFallback: 'Consider adding `unsafe-inline` (ignored by browsers supporting ' +
'nonces/hashes) to be backward compatible with older browsers.',
/** Message shown when a CSP is not backwards compatible with browsers that do not support the 'strict-dynamic' keyword. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "http:", "https:", and "'strict-dynamic'" do not need to be translated. */
allowlistFallback: 'Consider adding https: and http: URL schemes (ignored by browsers ' +
'supporting \'strict-dynamic\') to be backward compatible with older browsers.',
'supporting `strict-dynamic`) to be backward compatible with older browsers.',
/** Message shown when a CSP only provides a reporting destination through the report-to directive. Shown in a table with a list of other CSP vulnerabilities and suggestions. "CSP" stands for "Content Security Policy". "report-to", "report-uri", and "Chromium" do not need to be translated. */
reportToOnly: 'The reporting destination is only configured via the report-to directive. ' +
'This directive is only supported in Chromium-based browsers so it is ' +
Expand Down
50 changes: 25 additions & 25 deletions lighthouse-core/scripts/i18n/collect-strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import path from 'path';
import glob from 'glob';
import expect from 'expect';
import tsc from 'typescript';
import MessageParser from 'intl-messageformat-parser';
import MessageParser from '@formatjs/icu-messageformat-parser';
import esMain from 'es-main';

import {Util} from '../../../lighthouse-core/util-commonjs.js';
Expand All @@ -23,6 +23,7 @@ import {pruneObsoleteLhlMessages} from './prune-obsolete-lhl-messages.js';
import {countTranslatedMessages} from './count-translated.js';
import {LH_ROOT} from '../../../root.js';
import {resolveModulePath} from '../esm-utils.js';
import {escapeIcuMessage} from '../../../shared/localization/format.js';

// Match declarations of UIStrings, terminating in either a `};\n` (very likely to always be right)
// or `}\n\n` (allowing semicolon to be optional, but insisting on a double newline so that an
Expand Down Expand Up @@ -188,36 +189,35 @@ function convertMessageToCtc(lhlMessage, examples = {}) {
* @param {string} lhlMessage
*/
function _lhlValidityChecks(lhlMessage) {
let parsedMessage;
let parsedMessageElements;
try {
parsedMessage = MessageParser.parse(lhlMessage);
parsedMessageElements = MessageParser.parse(lhlMessage);
} catch (err) {
if (err.name !== 'SyntaxError') throw err;
// Improve the intl-messageformat-parser syntax error output.
/** @type {Array<{text: string}>} */
const expected = err.expected;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't exist any more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the error messages definitely seem less useful (at least looking at the test changes). Is there any kind of equivalent to replace this?

Copy link
Collaborator Author

@connorjclark connorjclark Apr 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a "location" field with begin/end markers, could use that to point to the specific characters that are problematic.

Can't promise fancy ascii pointers like you'd see elsewhere though :P

const expectedStr = expected.map(exp => `'${exp.text}'`).join(', ');
throw new Error(`Did not find the expected syntax (one of ${expectedStr}) in message "${lhlMessage}"`);
throw new Error(`[${err.message}] Did not find the expected syntax in message: ${err.originalMessage}`);
}

for (const element of parsedMessage.elements) {
if (element.type !== 'argumentElement' || !element.format) continue;

if (element.format.type === 'pluralFormat' || element.format.type === 'selectFormat') {
// `plural`/`select` arguments can't have content before or after them.
// See http://userguide.icu-project.org/formatparse/messages#TOC-Complex-Argument-Types
// e.g. https://github.com/GoogleChrome/lighthouse/pull/11068#discussion_r451682796
if (parsedMessage.elements.length > 1) {
throw new Error(`Content cannot appear outside plural or select ICU messages. Instead, repeat that content in each option (message: '${lhlMessage}')`);
}

// Each option value must also be a valid lhlMessage.
for (const option of element.format.options) {
const optionStr = lhlMessage.slice(option.value.location.start.offset, option.value.location.end.offset);
_lhlValidityChecks(optionStr);
/**
* @param {MessageParser.MessageFormatElement[]} elements
*/
function validate(elements) {
for (const element of elements) {
if (element.type === MessageParser.TYPE.plural || element.type === MessageParser.TYPE.select) {
// `plural`/`select` arguments can't have content before or after them.
// See http://userguide.icu-project.org/formatparse/messages#TOC-Complex-Argument-Types
// e.g. https://github.com/GoogleChrome/lighthouse/pull/11068#discussion_r451682796
if (elements.length > 1) {
throw new Error(`Content cannot appear outside plural or select ICU messages. Instead, repeat that content in each option (message: '${lhlMessage}')`);
}

for (const option of Object.values(element.options)) {
validate(option.value);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For lhl validation purposes, It's possible testing that the actual substrings were valid lhlMessages may have been intentional? option.value.elements was available in the old version of intl-messageformat-parser as well. I think this was checking that it would be TC compatible, not just intl-messageformat compatible

Copy link
Collaborator Author

@connorjclark connorjclark Apr 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it the same? I think that part of the grammar is pretty recursive (as in, if this lib can parse it into a value, and that field is equivalent in structure to the root field, then the raw string should be valid as a string we'd send to TC?). I'll try to inspect the original PR and see if it was brought up.

FWIW, you can see the recursive test case failing if you comment this line out.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessarily the same. For instance what's checked right above here: the ICU docs recommend that there be no content outside of plurals, but TC requires that there not be. Unfortunately everyone uses their own subset of the ICU/message format syntax.

The original PR doesn't list it's reasoning :( However, the fact that I wrote the "Each option value must also be a valid lhlMessage" comment above it to me means either there was a known mismatch, or the invariant is just what that comment describes and we should just check it directly rather than hoping there are no corner cases where they differ. Since this is in collect-strings, not in a lighthouse run or the report, seems reasonable?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The option value is really an array of messages. if one of the messages is a plural, then the validate function checks that it is the only entry (array length is 1 === no content outside of it). It seems like we're checking that correctly, and there's no failing test to say we aren't.

I do preferred this structured approach to reparsing the options... but we could do both just in case?

}
}
}

validate(parsedMessageElements);
}

/**
Expand Down Expand Up @@ -388,7 +388,7 @@ function _processPlaceholderDirectIcu(icu, examples) {
for (const [key, value] of Object.entries(examples)) {
// Make sure all examples have ICU vars
if (!icu.message.includes(`{${key}}`)) {
throw Error(`Example '${key}' provided, but has not corresponding ICU replacement in message "${icu.message}"`);
throw Error(`Example '${key}' provided, but has no corresponding ICU replacement in message "${icu.message}"`);
}
const eName = `ICU_${idx++}`;
tempMessage = tempMessage.replace(`{${key}}`, `$${eName}$`);
Expand Down Expand Up @@ -516,7 +516,7 @@ function parseUIStrings(sourceStr, liveUIStrings) {
const key = getIdentifier(property);

// Use live message to avoid having to e.g. concat strings broken into parts.
const message = liveUIStrings[key];
const message = escapeIcuMessage(liveUIStrings[key]);

// @ts-expect-error - Not part of the public tsc interface yet.
const jsDocComments = tsc.getJSDocCommentsAndTags(property);
Expand Down
15 changes: 9 additions & 6 deletions lighthouse-core/scripts/i18n/prune-obsolete-lhl-messages.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ import fs from 'fs';
import path from 'path';

import glob from 'glob';
import MessageParser from 'intl-messageformat-parser';
import MessageParser from '@formatjs/icu-messageformat-parser';

import {collectAllCustomElementsFromICU} from '../../../shared/localization/format.js';
import {
collectAllCustomElementsFromICU,
escapeIcuMessage,
} from '../../../shared/localization/format.js';
import {LH_ROOT, readJson} from '../../../root.js';

/** @typedef {Record<string, {message: string}>} LhlMessages */
Expand All @@ -24,8 +27,8 @@ import {LH_ROOT, readJson} from '../../../root.js';
* @return {boolean}
*/
function equalArguments(goldenArgumentIds, lhlMessage) {
const parsedMessage = MessageParser.parse(lhlMessage);
const lhlArgumentElements = collectAllCustomElementsFromICU(parsedMessage.elements);
const parsedMessageElements = MessageParser.parse(escapeIcuMessage(lhlMessage));
const lhlArgumentElements = collectAllCustomElementsFromICU(parsedMessageElements);
const lhlArgumentIds = [...lhlArgumentElements.keys()];

if (goldenArgumentIds.length !== lhlArgumentIds.length) return false;
Expand Down Expand Up @@ -96,8 +99,8 @@ function getGoldenLocaleArgumentIds(goldenLhl) {
const goldenLocaleArgumentIds = {};

for (const [messageId, {message}] of Object.entries(goldenLhl)) {
const parsedMessage = MessageParser.parse(message);
const goldenArgumentElements = collectAllCustomElementsFromICU(parsedMessage.elements);
const parsedMessageElements = MessageParser.parse(message);
const goldenArgumentElements = collectAllCustomElementsFromICU(parsedMessageElements);
const goldenArgumentIds = [...goldenArgumentElements.keys()].sort();

goldenLocaleArgumentIds[messageId] = goldenArgumentIds;
Expand Down
8 changes: 4 additions & 4 deletions lighthouse-core/test/scripts/i18n/collect-strings-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ describe('#_lhlValidityChecks', () => {
it('errors when using non-supported custom-formatted ICU format', () => {
const message = 'Hello World took {var, badFormat, milliseconds}.';
expect(() => collect.convertMessageToCtc(message)).toThrow(
/Did not find the expected syntax \(one of 'number', 'date', 'time', 'plural', 'selectordinal', 'select'\) in message "Hello World took {var, badFormat, milliseconds}."$/);
/\[INVALID_ARGUMENT_TYPE\] Did not find the expected syntax in message: Hello World took {var, badFormat, milliseconds}.$/);
});

it('errors when there is content outside of a plural argument', () => {
Expand Down Expand Up @@ -370,14 +370,14 @@ describe('#_lhlValidityChecks', () => {
/Content cannot appear outside plural or select ICU messages.*=1 {1 request} other {# requests}}'\)$/);
});

it('errors when there is content outside of nested plural aguments', () => {
it('errors when there is content outside of nested plural arguments', () => {
const message = `{user_gender, select,
female {Ms. {name} received {count, plural, =1 {one award.} other {# awards.}}}
male {Mr. {name} received {count, plural, =1 {one award.} other {# awards.}}}
other {{name} received {count, plural, =1 {one award.} other {# awards.}}}
}`;
expect(() => collect.convertMessageToCtc(message, {name: 'Elbert'})).toThrow(
/Content cannot appear outside plural or select ICU messages.*\(message: 'Ms. {name} received {count, plural, =1 {one award.} other {# awards.}}'\)$/);
/Content cannot appear outside plural or select ICU messages.*\(message: '{user_gender, select/);
});
/* eslint-enable max-len */
});
Expand Down Expand Up @@ -562,7 +562,7 @@ describe('Convert Message to Placeholder', () => {
const message = 'Hello name.';
expect(() => collect.convertMessageToCtc(message, {name: 'Mary'}))
// eslint-disable-next-line max-len
.toThrow(/Example 'name' provided, but has not corresponding ICU replacement in message "Hello name."/);
.toThrow(/Example 'name' provided, but has no corresponding ICU replacement in message "Hello name."/);
});

it('errors when direct ICU has no examples', () => {
Expand Down
10 changes: 5 additions & 5 deletions lighthouse-core/util-commonjs.js
Original file line number Diff line number Diff line change
Expand Up @@ -434,9 +434,9 @@ class Util {
case 'devtools': {
const {cpuSlowdownMultiplier, requestLatencyMs} = throttling;
cpuThrottling = `${Util.i18n.formatNumber(cpuSlowdownMultiplier)}x slowdown (DevTools)`;
networkThrottling = `${Util.i18n.formatNumber(requestLatencyMs)}${NBSP}ms HTTP RTT, ` +
`${Util.i18n.formatNumber(throttling.downloadThroughputKbps)}${NBSP}Kbps down, ` +
`${Util.i18n.formatNumber(throttling.uploadThroughputKbps)}${NBSP}Kbps up (DevTools)`;
networkThrottling = `${Util.i18n.formatMilliseconds(requestLatencyMs, 1)} HTTP RTT, ` +
`${Util.i18n.formatInteger(throttling.downloadThroughputKbps)}${NBSP}Kbps down, ` +
`${Util.i18n.formatInteger(throttling.uploadThroughputKbps)}${NBSP}Kbps up (DevTools)`;

const isSlow4G = () => {
return requestLatencyMs === 150 * 3.75 &&
Expand All @@ -449,8 +449,8 @@ class Util {
case 'simulate': {
const {cpuSlowdownMultiplier, rttMs, throughputKbps} = throttling;
cpuThrottling = `${Util.i18n.formatNumber(cpuSlowdownMultiplier)}x slowdown (Simulated)`;
networkThrottling = `${Util.i18n.formatNumber(rttMs)}${NBSP}ms TCP RTT, ` +
`${Util.i18n.formatNumber(throughputKbps)}${NBSP}Kbps throughput (Simulated)`;
networkThrottling = `${Util.i18n.formatMilliseconds(rttMs)} TCP RTT, ` +
`${Util.i18n.formatInteger(throughputKbps)}${NBSP}Kbps throughput (Simulated)`;

const isSlow4G = () => {
return rttMs === 150 && throughputKbps === 1.6 * 1024;
Expand Down
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
},
"devDependencies": {
"@build-tracker/cli": "^1.0.0-beta.15",
"@formatjs/icu-messageformat-parser": "^2.0.19",
"@rollup/plugin-alias": "^3.1.2",
"@rollup/plugin-commonjs": "^20.0.0",
"@rollup/plugin-dynamic-import-vars": "^1.1.1",
Expand Down Expand Up @@ -151,7 +152,7 @@
"gh-pages": "^2.0.1",
"glob": "^7.1.3",
"idb-keyval": "2.2.0",
"intl-messageformat-parser": "^1.8.1",
"intl-messageformat": "^9.12.0",
"jest": "27.1.1",
"jsdom": "^12.2.0",
"jsonld": "^5.2.0",
Expand Down Expand Up @@ -189,7 +190,6 @@
"cssstyle": "1.2.1",
"enquirer": "^2.3.6",
"http-link-header": "^0.8.0",
"intl-messageformat": "^4.4.0",
"jpeg-js": "^0.4.3",
"js-library-detector": "^6.4.0",
"lighthouse-logger": "^1.3.0",
Expand Down
Loading