From 4a830c2d9d6ca625ee059c274d94913144f033bd Mon Sep 17 00:00:00 2001 From: Lucas Santos Date: Mon, 23 Oct 2023 13:23:12 +0200 Subject: [PATCH 001/229] test_runner: add Date to the supported mock APIs signed-off-by: Lucas Santos PR-URL: https://github.com/nodejs/node/pull/48638 Reviewed-By: Benjamin Gruenbaum Reviewed-By: Moshe Atlow Reviewed-By: Erick Wendel Reviewed-By: Rafael Gonzaga Reviewed-By: Antoine du Hamel --- doc/api/test.md | 444 +++++++++- lib/internal/priority_queue.js | 4 + lib/internal/test_runner/mock/mock_timers.js | 583 ++++++++----- test/parallel/test-runner-mock-timers.js | 813 ++++++++----------- 4 files changed, 1126 insertions(+), 718 deletions(-) diff --git a/doc/api/test.md b/doc/api/test.md index 99ee7ada5ddab9..da3cb243667f89 100644 --- a/doc/api/test.md +++ b/doc/api/test.md @@ -528,7 +528,7 @@ This allows developers to write more reliable and predictable tests for time-dependent functionality. The example below shows how to mock `setTimeout`. -Using `.enable(['setTimeout']);` +Using `.enable({ apis: ['setTimeout'] });` it will mock the `setTimeout` functions in the [node:timers](./timers.md) and [node:timers/promises](./timers.md#timers-promises-api) modules, as well as from the Node.js global context. @@ -545,7 +545,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const fn = mock.fn(); // Optionally choose what to mock - mock.timers.enable(['setTimeout']); + mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(fn, 9999); assert.strictEqual(fn.mock.callCount(), 0); @@ -569,7 +569,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const fn = mock.fn(); // Optionally choose what to mock - mock.timers.enable(['setTimeout']); + mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(fn, 9999); assert.strictEqual(fn.mock.callCount(), 0); @@ -598,7 +598,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const fn = context.mock.fn(); // Optionally choose what to mock - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(fn, 9999); assert.strictEqual(fn.mock.callCount(), 0); @@ -616,7 +616,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const fn = context.mock.fn(); // Optionally choose what to mock - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(fn, 9999); assert.strictEqual(fn.mock.callCount(), 0); @@ -626,6 +626,220 @@ test('mocks setTimeout to be executed synchronously without having to actually w }); ``` +### Dates + +The mock timers API also allows the mocking of the `Date` object. This is a +useful feature for testing time-dependent functionality, or to simulate +internal calendar functions such as `Date.now()`. + +The dates implementation is also part of the [`MockTimers`][] class. Refer to it +for a full list of methods and features. + +**Note:** Dates and timers are dependent when mocked together. This means that +if you have both the `Date` and `setTimeout` mocked, advancing the time will +also advance the mocked date as they simulate a single internal clock. + +The example below show how to mock the `Date` object and obtain the current +`Date.now()` value. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('mocks the Date object', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['Date'] }); + // If not specified, the initial date will be based on 0 in the UNIX epoch + assert.strictEqual(Date.now(), 0); + + // Advance in time will also advance the date + context.mock.timers.tick(9999); + assert.strictEqual(Date.now(), 9999); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('mocks the Date object', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['Date'] }); + // If not specified, the initial date will be based on 0 in the UNIX epoch + assert.strictEqual(Date.now(), 0); + + // Advance in time will also advance the date + context.mock.timers.tick(9999); + assert.strictEqual(Date.now(), 9999); +}); +``` + +If there is no initial epoch set, the initial date will be based on 0 in the +Unix epoch. This is January 1st, 1970, 00:00:00 UTC. You can set an initial date +by passing a `now` property to the `.enable()` method. This value will be used +as the initial date for the mocked `Date` object. It can either be a positive +integer, or another Date object. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('mocks the Date object with initial time', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['Date'], now: 100 }); + assert.strictEqual(Date.now(), 100); + + // Advance in time will also advance the date + context.mock.timers.tick(200); + assert.strictEqual(Date.now(), 300); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('mocks the Date object with initial time', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['Date'], now: 100 }); + assert.strictEqual(Date.now(), 100); + + // Advance in time will also advance the date + context.mock.timers.tick(200); + assert.strictEqual(Date.now(), 300); +}); +``` + +You can use the `.setTime()` method to manually move the mocked date to another +time. This method only accepts a positive integer. + +**Note:** This method will execute any mocked timers that are in the past +from the new time. + +In the below example we are setting a new time for the mocked date. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('sets the time of a date object', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['Date'], now: 100 }); + assert.strictEqual(Date.now(), 100); + + // Advance in time will also advance the date + context.mock.timers.setTime(1000); + context.mock.timers.tick(200); + assert.strictEqual(Date.now(), 1200); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('sets the time of a date object', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['Date'], now: 100 }); + assert.strictEqual(Date.now(), 100); + + // Advance in time will also advance the date + context.mock.timers.setTime(1000); + context.mock.timers.tick(200); + assert.strictEqual(Date.now(), 1200); +}); +``` + +If you have any timer that's set to run in the past, it will be executed as if +the `.tick()` method has been called. This is useful if you want to test +time-dependent functionality that's already in the past. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('runs timers as setTime passes ticks', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + const fn = context.mock.fn(); + setTimeout(fn, 1000); + + context.mock.timers.setTime(800); + // Timer is not executed as the time is not yet reached + assert.strictEqual(fn.mock.callCount(), 0); + assert.strictEqual(Date.now(), 800); + + context.mock.timers.setTime(1200); + // Timer is executed as the time is now reached + assert.strictEqual(fn.mock.callCount(), 1); + assert.strictEqual(Date.now(), 1200); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('runs timers as setTime passes ticks', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + const fn = context.mock.fn(); + setTimeout(fn, 1000); + + context.mock.timers.setTime(800); + // Timer is not executed as the time is not yet reached + assert.strictEqual(fn.mock.callCount(), 0); + assert.strictEqual(Date.now(), 800); + + context.mock.timers.setTime(1200); + // Timer is executed as the time is now reached + assert.strictEqual(fn.mock.callCount(), 1); + assert.strictEqual(Date.now(), 1200); +}); +``` + +Using `.runAll()` will execute all timers that are currently in the queue. This +will also advance the mocked date to the time of the last timer that was +executed as if the time has passed. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('runs timers as setTime passes ticks', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + const fn = context.mock.fn(); + setTimeout(fn, 1000); + setTimeout(fn, 2000); + setTimeout(fn, 3000); + + context.mock.timers.runAll(); + // All timers are executed as the time is now reached + assert.strictEqual(fn.mock.callCount(), 3); + assert.strictEqual(Date.now(), 3000); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('runs timers as setTime passes ticks', (context) => { + // Optionally choose what to mock + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + const fn = context.mock.fn(); + setTimeout(fn, 1000); + setTimeout(fn, 2000); + setTimeout(fn, 3000); + + context.mock.timers.runAll(); + // All timers are executed as the time is now reached + assert.strictEqual(fn.mock.callCount(), 3); + assert.strictEqual(Date.now(), 3000); +}); +``` + ## Test reporters Enables timer mocking for the specified timers. -* `timers` {Array} An optional array containing the timers to mock. - The currently supported timer values are `'setInterval'`, `'setTimeout'`, - and `'setImmediate'`. If no value is provided, all timers (`'setInterval'`, - `'clearInterval'`, `'setTimeout'`, `'clearTimeout'`, `'setImmediate'`, - and `'clearImmediate'`) will be mocked by default. +* `enableOptions` {Object} Optional configuration options for enabling timer + mocking. The following properties are supported: + * `apis` {Array} An optional array containing the timers to mock. + The currently supported timer values are `'setInterval'`, `'setTimeout'`, `'setImmediate'`, + and `'Date'`. **Default:** `['setInterval', 'setTimeout', 'setImmediate', 'Date']`. + If no array is provided, all time related APIs (`'setInterval'`, `'clearInterval'`, + `'setTimeout'`, `'clearTimeout'`, and `'Date'`) will be mocked by default. + * `now` {number | Date} An optional number or Date object representing the + initial time (in milliseconds) to use as the value + for `Date.now()`. **Default:** `0`. **Note:** When you enable mocking for a specific timer, its associated clear function will also be implicitly mocked. -Example usage: +**Note:** Mocking `Date` will affect the behavior of the mocked timers +as they use the same internal clock. + +Example usage without setting initial time: ```mjs import { mock } from 'node:test'; -mock.timers.enable(['setInterval']); +mock.timers.enable({ apis: ['setInterval'] }); ``` ```cjs const { mock } = require('node:test'); -mock.timers.enable(['setInterval']); +mock.timers.enable({ apis: ['setInterval'] }); ``` The above example enables mocking for the `setInterval` timer and @@ -1630,12 +1859,36 @@ and `clearInterval` functions from [node:timers](./timers.md), [node:timers/promises](./timers.md#timers-promises-api), and `globalThis` will be mocked. +Example usage with initial time set + +```mjs +import { mock } from 'node:test'; +mock.timers.enable({ apis: ['Date'], now: 1000 }); +``` + +```cjs +const { mock } = require('node:test'); +mock.timers.enable({ apis: ['Date'], now: 1000 }); +``` + +Example usage with initial Date object as time set + +```mjs +import { mock } from 'node:test'; +mock.timers.enable({ apis: ['Date'], now: new Date() }); +``` + +```cjs +const { mock } = require('node:test'); +mock.timers.enable({ apis: ['Date'], now: new Date() }); +``` + Alternatively, if you call `mock.timers.enable()` without any parameters: All timers (`'setInterval'`, `'clearInterval'`, `'setTimeout'`, and `'clearTimeout'`) will be mocked. The `setInterval`, `clearInterval`, `setTimeout`, and `clearTimeout` functions from `node:timers`, `node:timers/promises`, -and `globalThis` will be mocked. +and `globalThis` will be mocked. As well as the global `Date` object. ### `timers.reset()` @@ -1692,7 +1945,7 @@ import { test } from 'node:test'; test('mocks setTimeout to be executed synchronously without having to actually wait for it', (context) => { const fn = context.mock.fn(); - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(fn, 9999); @@ -1711,7 +1964,7 @@ const { test } = require('node:test'); test('mocks setTimeout to be executed synchronously without having to actually wait for it', (context) => { const fn = context.mock.fn(); - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(fn, 9999); assert.strictEqual(fn.mock.callCount(), 0); @@ -1731,7 +1984,7 @@ import { test } from 'node:test'; test('mocks setTimeout to be executed synchronously without having to actually wait for it', (context) => { const fn = context.mock.fn(); - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); const nineSecs = 9000; setTimeout(fn, nineSecs); @@ -1750,7 +2003,7 @@ const { test } = require('node:test'); test('mocks setTimeout to be executed synchronously without having to actually wait for it', (context) => { const fn = context.mock.fn(); - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); const nineSecs = 9000; setTimeout(fn, nineSecs); @@ -1763,6 +2016,48 @@ test('mocks setTimeout to be executed synchronously without having to actually w }); ``` +Advancing time using `.tick` will also advance the time for any `Date` object +created after the mock was enabled (if `Date` was also set to be mocked). + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('mocks setTimeout to be executed synchronously without having to actually wait for it', (context) => { + const fn = context.mock.fn(); + + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + setTimeout(fn, 9999); + + assert.strictEqual(fn.mock.callCount(), 0); + assert.strictEqual(Date.now(), 0); + + // Advance in time + context.mock.timers.tick(9999); + assert.strictEqual(fn.mock.callCount(), 1); + assert.strictEqual(Date.now(), 9999); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('mocks setTimeout to be executed synchronously without having to actually wait for it', (context) => { + const fn = context.mock.fn(); + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + + setTimeout(fn, 9999); + assert.strictEqual(fn.mock.callCount(), 0); + assert.strictEqual(Date.now(), 0); + + // Advance in time + context.mock.timers.tick(9999); + assert.strictEqual(fn.mock.callCount(), 1); + assert.strictEqual(Date.now(), 9999); +}); +``` + #### Using clear functions As mentioned, all clear functions from timers (`clearTimeout` and `clearInterval`) @@ -1776,7 +2071,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const fn = context.mock.fn(); // Optionally choose what to mock - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); const id = setTimeout(fn, 9999); // Implicity mocked as well @@ -1796,7 +2091,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const fn = context.mock.fn(); // Optionally choose what to mock - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); const id = setTimeout(fn, 9999); // Implicity mocked as well @@ -1830,7 +2125,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const nodeTimerPromiseSpy = context.mock.fn(); // Optionally choose what to mock - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(globalTimeoutObjectSpy, 9999); nodeTimers.setTimeout(nodeTimerSpy, 9999); @@ -1857,7 +2152,7 @@ test('mocks setTimeout to be executed synchronously without having to actually w const nodeTimerPromiseSpy = context.mock.fn(); // Optionally choose what to mock - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout'] }); setTimeout(globalTimeoutObjectSpy, 9999); nodeTimers.setTimeout(nodeTimerSpy, 9999); @@ -1880,7 +2175,7 @@ import assert from 'node:assert'; import { test } from 'node:test'; import nodeTimersPromises from 'node:timers/promises'; test('should tick five times testing a real use case', async (context) => { - context.mock.timers.enable(['setInterval']); + context.mock.timers.enable({ apis: ['setInterval'] }); const expectedIterations = 3; const interval = 1000; @@ -1912,7 +2207,7 @@ const assert = require('node:assert'); const { test } = require('node:test'); const nodeTimersPromises = require('node:timers/promises'); test('should tick five times testing a real use case', async (context) => { - context.mock.timers.enable(['setInterval']); + context.mock.timers.enable({ apis: ['setInterval'] }); const expectedIterations = 3; const interval = 1000; @@ -1946,7 +2241,8 @@ added: - v20.4.0 --> -Triggers all pending mocked timers immediately. +Triggers all pending mocked timers immediately. If the `Date` object is also +mocked, it will also advance the `Date` object to the furthest timer's time. The example below triggers all pending timers immediately, causing them to execute without any delay. @@ -1956,7 +2252,7 @@ import assert from 'node:assert'; import { test } from 'node:test'; test('runAll functions following the given order', (context) => { - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); const results = []; setTimeout(() => results.push(1), 9999); @@ -1968,8 +2264,9 @@ test('runAll functions following the given order', (context) => { assert.deepStrictEqual(results, []); context.mock.timers.runAll(); - assert.deepStrictEqual(results, [3, 2, 1]); + // The Date object is also advanced to the furthest timer's time + assert.strictEqual(Date.now(), 9999); }); ``` @@ -1978,7 +2275,7 @@ const assert = require('node:assert'); const { test } = require('node:test'); test('runAll functions following the given order', (context) => { - context.mock.timers.enable(['setTimeout']); + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); const results = []; setTimeout(() => results.push(1), 9999); @@ -1990,8 +2287,9 @@ test('runAll functions following the given order', (context) => { assert.deepStrictEqual(results, []); context.mock.timers.runAll(); - assert.deepStrictEqual(results, [3, 2, 1]); + // The Date object is also advanced to the furthest timer's time + assert.strictEqual(Date.now(), 9999); }); ``` @@ -2000,6 +2298,92 @@ triggering timers in the context of timer mocking. It does not have any effect on real-time system clocks or actual timers outside of the mocking environment. +### `timers.setTime(milliseconds)` + + + +Sets the current Unix timestamp that will be used as reference for any mocked +`Date` objects. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('runAll functions following the given order', (context) => { + const now = Date.now(); + const setTime = 1000; + // Date.now is not mocked + assert.deepStrictEqual(Date.now(), now); + + context.mock.timers.enable({ apis: ['Date'] }); + context.mock.timers.setTime(setTime); + // Date.now is now 1000 + assert.strictEqual(Date.now(), setTime); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('setTime replaces current time', (context) => { + const now = Date.now(); + const setTime = 1000; + // Date.now is not mocked + assert.deepStrictEqual(Date.now(), now); + + context.mock.timers.enable({ apis: ['Date'] }); + context.mock.timers.setTime(setTime); + // Date.now is now 1000 + assert.strictEqual(Date.now(), setTime); +}); +``` + +#### Dates and Timers working together + +Dates and timer objects are dependent on each other. If you use `setTime()` to +pass the current time to the mocked `Date` object, the set timers with +`setTimeout` and `setInterval` will **not** be affected. + +However, the `tick` method **will** advanced the mocked `Date` object. + +```mjs +import assert from 'node:assert'; +import { test } from 'node:test'; + +test('runAll functions following the given order', (context) => { + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + const results = []; + setTimeout(() => results.push(1), 9999); + + assert.deepStrictEqual(results, []); + context.mock.timers.setTime(12000); + assert.deepStrictEqual(results, []); + // The date is advanced but the timers don't tick + assert.strictEqual(Date.now(), 12000); +}); +``` + +```cjs +const assert = require('node:assert'); +const { test } = require('node:test'); + +test('runAll functions following the given order', (context) => { + context.mock.timers.enable({ apis: ['setTimeout', 'Date'] }); + const results = []; + setTimeout(() => results.push(1), 9999); + + assert.deepStrictEqual(results, []); + context.mock.timers.setTime(12000); + assert.deepStrictEqual(results, []); + // The date is advanced but the timers don't tick + assert.strictEqual(Date.now(), 12000); +}); +``` + ## Class: `TestsStream` type to mimic unix. - if cmd[0] == "cat": - command = ["type"] - else: - command = [cmd[0].replace("/", "\\")] + command = ["type"] if cmd[0] == "cat" else [cmd[0].replace("/", "\\")] # Add call before command to ensure that commands can be tied together one # after the other without aborting in Incredibuild, since IB makes a bat # file out of the raw command string, and some commands (like python) are @@ -688,7 +685,7 @@ def _GenerateExternalRules(rules, output_dir, spec, sources, options, actions_to all_outputs.update(OrderedSet(outputs)) # Only use one target from each rule as the dependency for # 'all' so we don't try to build each rule multiple times. - first_outputs.append(list(outputs)[0]) + first_outputs.append(next(iter(outputs))) # Get the unique output directories for this rule. output_dirs = [os.path.split(i)[0] for i in outputs] for od in output_dirs: @@ -757,7 +754,7 @@ def _EscapeEnvironmentVariableExpansion(s): Returns: The escaped string. - """ # noqa: E731,E123,E501 + """ s = s.replace("%", "%%") return s @@ -1190,7 +1187,7 @@ def _AddConfigurationToMSVSProject(p, spec, config_type, config_name, config): precompiled_header = config.get("msvs_precompiled_header") # Prepare the list of tools as a dictionary. - tools = dict() + tools = {} # Add in user specified msvs_settings. msvs_settings = config.get("msvs_settings", {}) MSVSSettings.ValidateMSVSSettings(msvs_settings) @@ -1385,10 +1382,7 @@ def _GetDefines(config): """ defines = [] for d in config.get("defines", []): - if type(d) == list: - fd = "=".join([str(dpart) for dpart in d]) - else: - fd = str(d) + fd = "=".join([str(dpart) for dpart in d]) if isinstance(d, list) else str(d) defines.append(fd) return defines @@ -1579,10 +1573,10 @@ def _AdjustSourcesAndConvertToFilterHierarchy( # such as ../../src/modules/module1 etc. if version.UsesVcxproj(): while ( - all([isinstance(s, MSVSProject.Filter) for s in sources]) + all(isinstance(s, MSVSProject.Filter) for s in sources) and len({s.name for s in sources}) == 1 ): - assert all([len(s.contents) == 1 for s in sources]) + assert all(len(s.contents) == 1 for s in sources) sources = [s.contents[0] for s in sources] else: while len(sources) == 1 and isinstance(sources[0], MSVSProject.Filter): @@ -1599,10 +1593,7 @@ def _IdlFilesHandledNonNatively(spec, sources): if rule["extension"] == "idl" and int(rule.get("msvs_external_rule", 0)): using_idl = True break - if using_idl: - excluded_idl = [i for i in sources if i.endswith(".idl")] - else: - excluded_idl = [] + excluded_idl = [i for i in sources if i.endswith(".idl")] if using_idl else [] return excluded_idl @@ -1820,7 +1811,7 @@ def _GetPathDict(root, path): parent, folder = os.path.split(path) parent_dict = _GetPathDict(root, parent) if folder not in parent_dict: - parent_dict[folder] = dict() + parent_dict[folder] = {} return parent_dict[folder] @@ -3014,18 +3005,26 @@ def _GetMSBuildConfigurationDetails(spec, build_file): msbuild_attributes = _GetMSBuildAttributes(spec, settings, build_file) condition = _GetConfigurationCondition(name, settings, spec) character_set = msbuild_attributes.get("CharacterSet") + vctools_version = msbuild_attributes.get("VCToolsVersion") config_type = msbuild_attributes.get("ConfigurationType") _AddConditionalProperty(properties, condition, "ConfigurationType", config_type) + spectre_mitigation = msbuild_attributes.get('SpectreMitigation') + if spectre_mitigation: + _AddConditionalProperty(properties, condition, "SpectreMitigation", + spectre_mitigation) if config_type == "Driver": _AddConditionalProperty(properties, condition, "DriverType", "WDM") _AddConditionalProperty( properties, condition, "TargetVersion", _ConfigTargetVersion(settings) ) - if character_set: - if "msvs_enable_winrt" not in spec: - _AddConditionalProperty( - properties, condition, "CharacterSet", character_set - ) + if character_set and "msvs_enable_winrt" not in spec: + _AddConditionalProperty( + properties, condition, "CharacterSet", character_set + ) + if vctools_version and "msvs_enable_winrt" not in spec: + _AddConditionalProperty( + properties, condition, "VCToolsVersion", vctools_version + ) return _GetMSBuildPropertyGroup(spec, "Configuration", properties) @@ -3105,6 +3104,10 @@ def _ConvertMSVSBuildAttributes(spec, config, build_file): msbuild_attributes[a] = _ConvertMSVSCharacterSet(msvs_attributes[a]) elif a == "ConfigurationType": msbuild_attributes[a] = _ConvertMSVSConfigurationType(msvs_attributes[a]) + elif a == "SpectreMitigation": + msbuild_attributes[a] = msvs_attributes[a] + elif a == "VCToolsVersion": + msbuild_attributes[a] = msvs_attributes[a] else: print("Warning: Do not know how to convert MSVS attribute " + a) return msbuild_attributes @@ -3327,15 +3330,14 @@ def _GetMSBuildToolSettingsSections(spec, configurations): for tool_name, tool_settings in sorted(msbuild_settings.items()): # Skip the tool named '' which is a holder of global settings handled # by _GetMSBuildConfigurationGlobalProperties. - if tool_name: - if tool_settings: - tool = [tool_name] - for name, value in sorted(tool_settings.items()): - formatted_value = _GetValueFormattedForMSBuild( - tool_name, name, value - ) - tool.append([name, formatted_value]) - group.append(tool) + if tool_name and tool_settings: + tool = [tool_name] + for name, value in sorted(tool_settings.items()): + formatted_value = _GetValueFormattedForMSBuild( + tool_name, name, value + ) + tool.append([name, formatted_value]) + group.append(tool) groups.append(group) return groups @@ -3463,10 +3465,7 @@ def _GetValueFormattedForMSBuild(tool_name, name, value): "Link": ["AdditionalOptions"], "Lib": ["AdditionalOptions"], } - if tool_name in exceptions and name in exceptions[tool_name]: - char = " " - else: - char = ";" + char = " " if name in exceptions.get(tool_name, []) else ";" formatted_value = char.join( [MSVSSettings.ConvertVCMacrosToMSBuild(i) for i in value] ) diff --git a/tools/gyp/pylib/gyp/generator/ninja.py b/tools/gyp/pylib/gyp/generator/ninja.py index 3db3771ac97855..8ba341e96d3f0d 100644 --- a/tools/gyp/pylib/gyp/generator/ninja.py +++ b/tools/gyp/pylib/gyp/generator/ninja.py @@ -1583,7 +1583,7 @@ def WriteTarget(self, spec, config_name, config, link_deps, compile_deps): elif spec["type"] == "static_library": self.target.binary = self.ComputeOutput(spec) if ( - self.flavor not in ("mac", "openbsd", "netbsd", "win") + self.flavor not in ("ios", "mac", "netbsd", "openbsd", "win") and not self.is_standalone_static_library ): self.ninja.build( @@ -1815,10 +1815,7 @@ def ComputeOutputFileName(self, spec, type=None): "executable": default_variables["EXECUTABLE_SUFFIX"], } extension = spec.get("product_extension") - if extension: - extension = "." + extension - else: - extension = DEFAULT_EXTENSION.get(type, "") + extension = "." + extension if extension else DEFAULT_EXTENSION.get(type, "") if "product_name" in spec: # If we were given an explicit name, use that. @@ -2496,7 +2493,7 @@ def GenerateOutputForConfig(target_list, target_dicts, data, params, config_name ), ) - if flavor != "mac" and flavor != "win": + if flavor not in ("ios", "mac", "win"): master_ninja.rule( "alink", description="AR $out", @@ -2533,7 +2530,7 @@ def GenerateOutputForConfig(target_list, target_dicts, data, params, config_name description="SOLINK $lib", restat=True, command=mtime_preserving_solink_base - % {"suffix": "@$link_file_list"}, # noqa: E501 + % {"suffix": "@$link_file_list"}, rspfile="$link_file_list", rspfile_content=( "-Wl,--whole-archive $in $solibs -Wl," "--no-whole-archive $libs" diff --git a/tools/gyp/pylib/gyp/generator/xcode.py b/tools/gyp/pylib/gyp/generator/xcode.py index 2f4d17e514e439..1ac672c3876bd9 100644 --- a/tools/gyp/pylib/gyp/generator/xcode.py +++ b/tools/gyp/pylib/gyp/generator/xcode.py @@ -439,7 +439,7 @@ def Finalize2(self, xcode_targets, xcode_target_to_target_dict): # it opens the project file, which will result in unnecessary diffs. # TODO(mark): This is evil because it relies on internal knowledge of # PBXProject._other_pbxprojects. - for other_pbxproject in self.project._other_pbxprojects.keys(): + for other_pbxproject in self.project._other_pbxprojects: self.project.AddOrGetProjectReference(other_pbxproject) self.project.SortRemoteProductReferences() @@ -1118,10 +1118,7 @@ def GenerateOutput(target_list, target_dicts, data, params): for concrete_output_index, concrete_output in enumerate( concrete_outputs ): - if concrete_output_index == 0: - bol = "" - else: - bol = " " + bol = "" if concrete_output_index == 0 else " " makefile.write(f"{bol}{concrete_output} \\\n") concrete_output_dir = posixpath.dirname(concrete_output) diff --git a/tools/gyp/pylib/gyp/input.py b/tools/gyp/pylib/gyp/input.py index 354958bfb2ab55..8f39519dee51fb 100644 --- a/tools/gyp/pylib/gyp/input.py +++ b/tools/gyp/pylib/gyp/input.py @@ -16,9 +16,9 @@ import sys import threading import traceback -from distutils.version import StrictVersion from gyp.common import GypError from gyp.common import OrderedSet +from packaging.version import Version # A list of types that are treated as linkable. linkable_types = [ @@ -225,7 +225,7 @@ def LoadOneBuildFile(build_file_path, data, aux_data, includes, is_target, check return data[build_file_path] if os.path.exists(build_file_path): - build_file_contents = open(build_file_path, encoding='utf-8').read() + build_file_contents = open(build_file_path, encoding="utf-8").read() else: raise GypError(f"{build_file_path} not found (cwd: {os.getcwd()})") @@ -870,10 +870,7 @@ def ExpandVariables(input, phase, variables, build_file): # This works around actions/rules which have more inputs than will # fit on the command line. if file_list: - if type(contents) is list: - contents_list = contents - else: - contents_list = contents.split(" ") + contents_list = contents if type(contents) is list else contents.split(" ") replacement = contents_list[0] if os.path.isabs(replacement): raise GypError('| cannot handle absolute paths, got "%s"' % replacement) @@ -961,13 +958,13 @@ def ExpandVariables(input, phase, variables, build_file): # Fix up command with platform specific workarounds. contents = FixupPlatformCommand(contents) try: - p = subprocess.Popen( + # stderr will be printed no matter what + result = subprocess.run( contents, - shell=use_shell, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - stdin=subprocess.PIPE, + shell=use_shell, cwd=build_file_dir, + check=False ) except Exception as e: raise GypError( @@ -975,19 +972,12 @@ def ExpandVariables(input, phase, variables, build_file): % (e, contents, build_file) ) - p_stdout, p_stderr = p.communicate("") - p_stdout = p_stdout.decode("utf-8") - p_stderr = p_stderr.decode("utf-8") - - if p.wait() != 0 or p_stderr: - sys.stderr.write(p_stderr) - # Simulate check_call behavior, since check_call only exists - # in python 2.5 and later. + if result.returncode > 0: raise GypError( "Call to '%s' returned exit status %d while in %s." - % (contents, p.returncode, build_file) + % (contents, result.returncode, build_file) ) - replacement = p_stdout.rstrip() + replacement = result.stdout.decode("utf-8").rstrip() cached_command_results[cache_key] = replacement else: @@ -1190,7 +1180,7 @@ def EvalSingleCondition(cond_expr, true_dict, false_dict, phase, variables, buil else: ast_code = compile(cond_expr_expanded, "", "eval") cached_conditions_asts[cond_expr_expanded] = ast_code - env = {"__builtins__": {}, "v": StrictVersion} + env = {"__builtins__": {}, "v": Version} if eval(ast_code, env, variables): return true_dict return false_dict @@ -1586,14 +1576,12 @@ def ExpandWildcardDependencies(targets, data): continue dependency_target_name = dependency_target_dict["target_name"] if ( - dependency_target != "*" - and dependency_target != dependency_target_name + dependency_target not in {"*", dependency_target_name} ): continue dependency_target_toolset = dependency_target_dict["toolset"] if ( - dependency_toolset != "*" - and dependency_toolset != dependency_target_toolset + dependency_toolset not in {"*", dependency_target_toolset} ): continue dependency = gyp.common.QualifiedTarget( @@ -1637,15 +1625,14 @@ def RemoveSelfDependencies(targets): dependencies = target_dict.get(dependency_key, []) if dependencies: for t in dependencies: - if t == target_name: - if ( - targets[t] - .get("variables", {}) - .get("prune_self_dependency", 0) - ): - target_dict[dependency_key] = Filter( - dependencies, target_name - ) + if t == target_name and ( + targets[t] + .get("variables", {}) + .get("prune_self_dependency", 0) + ): + target_dict[dependency_key] = Filter( + dependencies, target_name + ) def RemoveLinkDependenciesFromNoneTargets(targets): @@ -2245,10 +2232,7 @@ def is_in_set_or_list(x, s, items): singleton = False if type(item) in (str, int): # The cheap and easy case. - if is_paths: - to_item = MakePathRelative(to_file, fro_file, item) - else: - to_item = item + to_item = MakePathRelative(to_file, fro_file, item) if is_paths else item if not (type(item) is str and item.startswith("-")): # Any string that doesn't begin with a "-" is a singleton - it can @@ -2474,10 +2458,7 @@ def SetUpConfigurations(target, target_dict): new_configuration_dict = {} for (key, target_val) in target_dict.items(): key_ext = key[-1:] - if key_ext in key_suffixes: - key_base = key[:-1] - else: - key_base = key + key_base = key[:-1] if key_ext in key_suffixes else key if key_base not in non_configuration_keys: new_configuration_dict[key] = gyp.simple_copy.deepcopy(target_val) @@ -2489,7 +2470,7 @@ def SetUpConfigurations(target, target_dict): merged_configurations[configuration] = new_configuration_dict # Put the new configurations back into the target dict as a configuration. - for configuration in merged_configurations.keys(): + for configuration in merged_configurations: target_dict["configurations"][configuration] = merged_configurations[ configuration ] @@ -2506,19 +2487,16 @@ def SetUpConfigurations(target, target_dict): delete_keys = [] for key in target_dict: key_ext = key[-1:] - if key_ext in key_suffixes: - key_base = key[:-1] - else: - key_base = key + key_base = key[:-1] if key_ext in key_suffixes else key if key_base not in non_configuration_keys: delete_keys.append(key) for key in delete_keys: del target_dict[key] # Check the configurations to see if they contain invalid keys. - for configuration in target_dict["configurations"].keys(): + for configuration in target_dict["configurations"]: configuration_dict = target_dict["configurations"][configuration] - for key in configuration_dict.keys(): + for key in configuration_dict: if key in invalid_configuration_keys: raise GypError( "%s not allowed in the %s configuration, found in " @@ -2561,7 +2539,7 @@ def ProcessListFiltersInDict(name, the_dict): del_lists = [] for key, value in the_dict.items(): operation = key[-1] - if operation != "!" and operation != "/": + if operation not in {"!", "/"}: continue if type(value) is not list: diff --git a/tools/gyp/pylib/gyp/msvs_emulation.py b/tools/gyp/pylib/gyp/msvs_emulation.py index 5b9c2712e091b4..38fa21dd666697 100644 --- a/tools/gyp/pylib/gyp/msvs_emulation.py +++ b/tools/gyp/pylib/gyp/msvs_emulation.py @@ -93,7 +93,7 @@ def _AddPrefix(element, prefix): if element is None: return element # Note, not Iterable because we don't want to handle strings like that. - if isinstance(element, list) or isinstance(element, tuple): + if isinstance(element, (list, tuple)): return [prefix + e for e in element] else: return prefix + element @@ -105,7 +105,7 @@ def _DoRemapping(element, map): if map is not None and element is not None: if not callable(map): map = map.get # Assume it's a dict, otherwise a callable to do the remap. - if isinstance(element, list) or isinstance(element, tuple): + if isinstance(element, (list, tuple)): element = filter(None, [map(elem) for elem in element]) else: element = map(element) @@ -117,7 +117,7 @@ def _AppendOrReturn(append, element): then add |element| to it, adding each item in |element| if it's a list or tuple.""" if append is not None and element is not None: - if isinstance(element, list) or isinstance(element, tuple): + if isinstance(element, (list, tuple)): append.extend(element) else: append.append(element) @@ -183,7 +183,7 @@ def ExtractSharedMSVSSystemIncludes(configs, generator_flags): expanded_system_includes = OrderedSet( [ExpandMacros(include, env) for include in all_system_includes] ) - if any(["$" in include for include in expanded_system_includes]): + if any("$" in include for include in expanded_system_includes): # Some path relies on target-specific variables, bail. return None @@ -255,10 +255,7 @@ def GetVSMacroEnv(self, base_to_build=None, config=None): """Get a dict of variables mapping internal VS macro names to their gyp equivalents.""" target_arch = self.GetArch(config) - if target_arch == "x86": - target_platform = "Win32" - else: - target_platform = target_arch + target_platform = "Win32" if target_arch == "x86" else target_arch target_name = self.spec.get("product_prefix", "") + self.spec.get( "product_name", self.spec["target_name"] ) @@ -738,10 +735,7 @@ def GetLdflags( # TODO(scottmg): This should sort of be somewhere else (not really a flag). ld("AdditionalDependencies", prefix="") - if self.GetArch(config) == "x86": - safeseh_default = "true" - else: - safeseh_default = None + safeseh_default = "true" if self.GetArch(config) == "x86" else None ld( "ImageHasSafeExceptionHandlers", map={"false": ":NO", "true": ""}, @@ -960,15 +954,12 @@ def GetRuleShellFlags(self, rule): def _HasExplicitRuleForExtension(self, spec, extension): """Determine if there's an explicit rule for a particular extension.""" - for rule in spec.get("rules", []): - if rule["extension"] == extension: - return True - return False + return any(rule["extension"] == extension for rule in spec.get("rules", [])) def _HasExplicitIdlActions(self, spec): """Determine if an action should not run midl for .idl files.""" return any( - [action.get("explicit_idl_action", 0) for action in spec.get("actions", [])] + action.get("explicit_idl_action", 0) for action in spec.get("actions", []) ) def HasExplicitIdlRulesOrActions(self, spec): diff --git a/tools/gyp/pylib/gyp/win_tool.py b/tools/gyp/pylib/gyp/win_tool.py index 638eee40029411..171d7295747fcd 100755 --- a/tools/gyp/pylib/gyp/win_tool.py +++ b/tools/gyp/pylib/gyp/win_tool.py @@ -219,11 +219,10 @@ def ExecLinkWithManifests( our_manifest = "%(out)s.manifest" % variables # Load and normalize the manifests. mt.exe sometimes removes whitespace, # and sometimes doesn't unfortunately. - with open(our_manifest) as our_f: - with open(assert_manifest) as assert_f: - translator = str.maketrans('', '', string.whitespace) - our_data = our_f.read().translate(translator) - assert_data = assert_f.read().translate(translator) + with open(our_manifest) as our_f, open(assert_manifest) as assert_f: + translator = str.maketrans("", "", string.whitespace) + our_data = our_f.read().translate(translator) + assert_data = assert_f.read().translate(translator) if our_data != assert_data: os.unlink(out) diff --git a/tools/gyp/pylib/gyp/xcode_emulation.py b/tools/gyp/pylib/gyp/xcode_emulation.py index a75d8eeab7bda0..29caf1ce7fbb97 100644 --- a/tools/gyp/pylib/gyp/xcode_emulation.py +++ b/tools/gyp/pylib/gyp/xcode_emulation.py @@ -685,10 +685,7 @@ def GetCflags(self, configname, arch=None): if platform_root: cflags.append("-F" + platform_root + "/Developer/Library/Frameworks/") - if sdk_root: - framework_root = sdk_root - else: - framework_root = "" + framework_root = sdk_root if sdk_root else "" config = self.spec["configurations"][self.configname] framework_dirs = config.get("mac_framework_dirs", []) for directory in framework_dirs: @@ -1248,10 +1245,7 @@ def _AdjustLibrary(self, library, config_name=None): l_flag = "-framework " + os.path.splitext(os.path.basename(library))[0] else: m = self.library_re.match(library) - if m: - l_flag = "-l" + m.group(1) - else: - l_flag = library + l_flag = "-l" + m.group(1) if m else library sdk_root = self._SdkPath(config_name) if not sdk_root: @@ -1545,7 +1539,7 @@ def CLTVersion(): except GypError: continue - regex = re.compile(r'Command Line Tools for Xcode\s+(?P\S+)') + regex = re.compile(r"Command Line Tools for Xcode\s+(?P\S+)") try: output = GetStdout(["/usr/sbin/softwareupdate", "--history"]) return re.search(regex, output).groupdict()["version"] diff --git a/tools/gyp/pylib/gyp/xcodeproj_file.py b/tools/gyp/pylib/gyp/xcodeproj_file.py index 076eea37211179..33c667c266bf69 100644 --- a/tools/gyp/pylib/gyp/xcodeproj_file.py +++ b/tools/gyp/pylib/gyp/xcodeproj_file.py @@ -971,7 +971,7 @@ def __init__(self, properties=None, id=None, parent=None): if "path" in self._properties and "name" not in self._properties: path = self._properties["path"] name = posixpath.basename(path) - if name != "" and path != name: + if name not in ("", path): self.SetProperty("name", name) if "path" in self._properties and ( @@ -2355,9 +2355,8 @@ def __init__( # property was supplied, set "productName" if it is not present. Also set # the "PRODUCT_NAME" build setting in each configuration, but only if # the setting is not present in any build configuration. - if "name" in self._properties: - if "productName" not in self._properties: - self.SetProperty("productName", self._properties["name"]) + if "name" in self._properties and "productName" not in self._properties: + self.SetProperty("productName", self._properties["name"]) if "productName" in self._properties: if "buildConfigurationList" in self._properties: @@ -2547,13 +2546,12 @@ def __init__( force_extension = suffix[1:] if ( - self._properties["productType"] - == "com.apple.product-type-bundle.unit.test" - or self._properties["productType"] - == "com.apple.product-type-bundle.ui-testing" - ): - if force_extension is None: - force_extension = suffix[1:] + self._properties["productType"] in { + "com.apple.product-type-bundle.unit.test", + "com.apple.product-type-bundle.ui-testing" + } + ) and force_extension is None: + force_extension = suffix[1:] if force_extension is not None: # If it's a wrapper (bundle), set WRAPPER_EXTENSION. @@ -2636,10 +2634,13 @@ def HeadersPhase(self): # frameworks phases, if any. insert_at = len(self._properties["buildPhases"]) for index, phase in enumerate(self._properties["buildPhases"]): - if ( - isinstance(phase, PBXResourcesBuildPhase) - or isinstance(phase, PBXSourcesBuildPhase) - or isinstance(phase, PBXFrameworksBuildPhase) + if isinstance( + phase, + ( + PBXResourcesBuildPhase, + PBXSourcesBuildPhase, + PBXFrameworksBuildPhase, + ), ): insert_at = index break @@ -2658,9 +2659,7 @@ def ResourcesPhase(self): # phases, if any. insert_at = len(self._properties["buildPhases"]) for index, phase in enumerate(self._properties["buildPhases"]): - if isinstance(phase, PBXSourcesBuildPhase) or isinstance( - phase, PBXFrameworksBuildPhase - ): + if isinstance(phase, (PBXSourcesBuildPhase, PBXFrameworksBuildPhase)): insert_at = index break @@ -2701,8 +2700,10 @@ def AddDependency(self, other): other._properties["productType"] == static_library_type or ( ( - other._properties["productType"] == shared_library_type - or other._properties["productType"] == framework_type + other._properties["productType"] in { + shared_library_type, + framework_type + } ) and ( (not other.HasBuildSetting("MACH_O_TYPE")) @@ -2770,7 +2771,7 @@ def __init__(self, properties=None, id=None, parent=None, path=None): self.path = path self._other_pbxprojects = {} # super - return XCContainerPortal.__init__(self, properties, id, parent) + XCContainerPortal.__init__(self, properties, id, parent) def Name(self): name = self.path @@ -2990,7 +2991,7 @@ def AddOrGetProjectReference(self, other_pbxproject): # Xcode seems to sort this list case-insensitively self._properties["projectReferences"] = sorted( self._properties["projectReferences"], - key=lambda x: x["ProjectRef"].Name().lower + key=lambda x: x["ProjectRef"].Name().lower() ) else: # The link already exists. Pull out the relevnt data. diff --git a/tools/gyp/pylib/packaging/LICENSE b/tools/gyp/pylib/packaging/LICENSE new file mode 100644 index 00000000000000..6f62d44e4ef733 --- /dev/null +++ b/tools/gyp/pylib/packaging/LICENSE @@ -0,0 +1,3 @@ +This software is made available under the terms of *either* of the licenses +found in LICENSE.APACHE or LICENSE.BSD. Contributions to this software is made +under the terms of *both* these licenses. diff --git a/tools/gyp/pylib/packaging/LICENSE.APACHE b/tools/gyp/pylib/packaging/LICENSE.APACHE new file mode 100644 index 00000000000000..f433b1a53f5b83 --- /dev/null +++ b/tools/gyp/pylib/packaging/LICENSE.APACHE @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/tools/gyp/pylib/packaging/LICENSE.BSD b/tools/gyp/pylib/packaging/LICENSE.BSD new file mode 100644 index 00000000000000..42ce7b75c92fb0 --- /dev/null +++ b/tools/gyp/pylib/packaging/LICENSE.BSD @@ -0,0 +1,23 @@ +Copyright (c) Donald Stufft and individual contributors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tools/gyp/pylib/packaging/__init__.py b/tools/gyp/pylib/packaging/__init__.py new file mode 100644 index 00000000000000..5fd91838316fbe --- /dev/null +++ b/tools/gyp/pylib/packaging/__init__.py @@ -0,0 +1,15 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" + +__version__ = "23.3.dev0" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/tools/gyp/pylib/packaging/_elffile.py b/tools/gyp/pylib/packaging/_elffile.py new file mode 100644 index 00000000000000..6fb19b30bb53c1 --- /dev/null +++ b/tools/gyp/pylib/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/tools/gyp/pylib/packaging/_manylinux.py b/tools/gyp/pylib/packaging/_manylinux.py new file mode 100644 index 00000000000000..3705d50db9193e --- /dev/null +++ b/tools/gyp/pylib/packaging/_manylinux.py @@ -0,0 +1,252 @@ +import collections +import contextlib +import functools +import os +import re +import sys +import warnings +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: + try: + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None + + +def _is_linux_armhf(executable: str) -> bool: + # hard-float ABI can be detected from the ELF header of the running + # process + # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) + + +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x", "loongarch64"} + return any(arch in allowed_archs for arch in archs) + + +# If glibc ever changes its major version, we need to know what the last +# minor version was, so we can build the complete list of all versions. +# For now, guess what the highest minor version might be, assume it will +# be 50 for testing. Once this actually happens, update the dictionary +# with the actual value. +_LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) + + +class _GLibCVersion(NamedTuple): + major: int + minor: int + + +def _glibc_version_string_confstr() -> Optional[str]: + """ + Primary implementation of glibc_version_string using os.confstr. + """ + # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely + # to be broken or missing. This strategy is used in the standard library + # platform module. + # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 + try: + # Should be a string like "glibc 2.17". + version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION") + assert version_string is not None + _, version = version_string.rsplit() + except (AssertionError, AttributeError, OSError, ValueError): + # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... + return None + return version + + +def _glibc_version_string_ctypes() -> Optional[str]: + """ + Fallback implementation of glibc_version_string using ctypes. + """ + try: + import ctypes + except ImportError: + return None + + # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen + # manpage says, "If filename is NULL, then the returned handle is for the + # main program". This way we can let the linker do the work to figure out + # which libc our process is actually using. + # + # We must also handle the special case where the executable is not a + # dynamically linked executable. This can occur when using musl libc, + # for example. In this situation, dlopen() will error, leading to an + # OSError. Interestingly, at least in the case of musl, there is no + # errno set on the OSError. The single string argument used to construct + # OSError comes from libc itself and is therefore not portable to + # hard code here. In any case, failure to call dlopen() means we + # can proceed, so we bail on our attempt. + try: + process_namespace = ctypes.CDLL(None) + except OSError: + return None + + try: + gnu_get_libc_version = process_namespace.gnu_get_libc_version + except AttributeError: + # Symbol doesn't exist -> therefore, we are not linked to + # glibc. + return None + + # Call gnu_get_libc_version, which returns a string like "2.5" + gnu_get_libc_version.restype = ctypes.c_char_p + version_str: str = gnu_get_libc_version() + # py2 / py3 compatibility: + if not isinstance(version_str, str): + version_str = version_str.decode("ascii") + + return version_str + + +def _glibc_version_string() -> Optional[str]: + """Returns glibc version string, or None if not using glibc.""" + return _glibc_version_string_confstr() or _glibc_version_string_ctypes() + + +def _parse_glibc_version(version_str: str) -> Tuple[int, int]: + """Parse glibc version. + + We use a regexp instead of str.split because we want to discard any + random junk that might come after the minor version -- this might happen + in patched/forked versions of glibc (e.g. Linaro's version of glibc + uses version strings like "2.20-2014.11"). See gh-3588. + """ + m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) + if not m: + warnings.warn( + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", + RuntimeWarning, + ) + return -1, -1 + return int(m.group("major")), int(m.group("minor")) + + +@functools.lru_cache() +def _get_glibc_version() -> Tuple[int, int]: + version_str = _glibc_version_string() + if version_str is None: + return (-1, -1) + return _parse_glibc_version(version_str) + + +# From PEP 513, PEP 600 +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: + sys_glibc = _get_glibc_version() + if sys_glibc < version: + return False + # Check for presence of _manylinux module. + try: + import _manylinux # noqa + except ImportError: + return True + if hasattr(_manylinux, "manylinux_compatible"): + result = _manylinux.manylinux_compatible(version[0], version[1], arch) + if result is not None: + return bool(result) + return True + if version == _GLibCVersion(2, 5): + if hasattr(_manylinux, "manylinux1_compatible"): + return bool(_manylinux.manylinux1_compatible) + if version == _GLibCVersion(2, 12): + if hasattr(_manylinux, "manylinux2010_compatible"): + return bool(_manylinux.manylinux2010_compatible) + if version == _GLibCVersion(2, 17): + if hasattr(_manylinux, "manylinux2014_compatible"): + return bool(_manylinux.manylinux2014_compatible) + return True + + +_LEGACY_MANYLINUX_MAP = { + # CentOS 7 w/ glibc 2.17 (PEP 599) + (2, 17): "manylinux2014", + # CentOS 6 w/ glibc 2.12 (PEP 571) + (2, 12): "manylinux2010", + # CentOS 5 w/ glibc 2.5 (PEP 513) + (2, 5): "manylinux1", +} + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): + return + # Oldest glibc to be supported regardless of architecture is (2, 17). + too_old_glibc2 = _GLibCVersion(2, 16) + if set(archs) & {"x86_64", "i686"}: + # On x86/i686 also oldest glibc to be supported is (2, 5). + too_old_glibc2 = _GLibCVersion(2, 4) + current_glibc = _GLibCVersion(*_get_glibc_version()) + glibc_max_list = [current_glibc] + # We can assume compatibility across glibc major versions. + # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 + # + # Build a list of maximum glibc versions so that we can + # output the canonical list of all glibc from current_glibc + # down to too_old_glibc2, including all intermediary versions. + for glibc_major in range(current_glibc.major - 1, 1, -1): + glibc_minor = _LAST_GLIBC_MINOR[glibc_major] + glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/tools/gyp/pylib/packaging/_musllinux.py b/tools/gyp/pylib/packaging/_musllinux.py new file mode 100644 index 00000000000000..86419df9d7087f --- /dev/null +++ b/tools/gyp/pylib/packaging/_musllinux.py @@ -0,0 +1,83 @@ +"""PEP 656 support. + +This module implements logic to detect if the currently running Python is +linked against musl, and what musl version is used. +""" + +import functools +import re +import subprocess +import sys +from typing import Iterator, NamedTuple, Optional, Sequence + +from ._elffile import ELFFile + + +class _MuslVersion(NamedTuple): + major: int + minor: int + + +def _parse_musl_version(output: str) -> Optional[_MuslVersion]: + lines = [n for n in (n.strip() for n in output.splitlines()) if n] + if len(lines) < 2 or lines[0][:4] != "musl": + return None + m = re.match(r"Version (\d+)\.(\d+)", lines[1]) + if not m: + return None + return _MuslVersion(major=int(m.group(1)), minor=int(m.group(2))) + + +@functools.lru_cache() +def _get_musl_version(executable: str) -> Optional[_MuslVersion]: + """Detect currently-running musl runtime version. + + This is done by checking the specified executable's dynamic linking + information, and invoking the loader to parse its output for a version + string. If the loader is musl, the output would be something like:: + + musl libc (x86_64) + Version 1.2.2 + Dynamic Program Loader + """ + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: + return None + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) + return _parse_musl_version(proc.stderr) + + +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate musllinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. + + :returns: An iterator of compatible musllinux tags. + """ + sys_musl = _get_musl_version(sys.executable) + if sys_musl is None: # Python not dynamically linked against musl. + return + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + + +if __name__ == "__main__": # pragma: no cover + import sysconfig + + plat = sysconfig.get_platform() + assert plat.startswith("linux-"), "not linux" + + print("plat:", plat) + print("musl:", _get_musl_version(sys.executable)) + print("tags:", end=" ") + for t in platform_tags(re.sub(r"[.-]", "_", plat.split("-", 1)[-1])): + print(t, end="\n ") diff --git a/tools/gyp/pylib/packaging/_parser.py b/tools/gyp/pylib/packaging/_parser.py new file mode 100644 index 00000000000000..4576981c2dd755 --- /dev/null +++ b/tools/gyp/pylib/packaging/_parser.py @@ -0,0 +1,359 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/tools/gyp/pylib/packaging/_structures.py b/tools/gyp/pylib/packaging/_structures.py new file mode 100644 index 00000000000000..90a6465f9682c8 --- /dev/null +++ b/tools/gyp/pylib/packaging/_structures.py @@ -0,0 +1,61 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + + +class InfinityType: + def __repr__(self) -> str: + return "Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return False + + def __le__(self, other: object) -> bool: + return False + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return True + + def __ge__(self, other: object) -> bool: + return True + + def __neg__(self: object) -> "NegativeInfinityType": + return NegativeInfinity + + +Infinity = InfinityType() + + +class NegativeInfinityType: + def __repr__(self) -> str: + return "-Infinity" + + def __hash__(self) -> int: + return hash(repr(self)) + + def __lt__(self, other: object) -> bool: + return True + + def __le__(self, other: object) -> bool: + return True + + def __eq__(self, other: object) -> bool: + return isinstance(other, self.__class__) + + def __gt__(self, other: object) -> bool: + return False + + def __ge__(self, other: object) -> bool: + return False + + def __neg__(self: object) -> InfinityType: + return Infinity + + +NegativeInfinity = NegativeInfinityType() diff --git a/tools/gyp/pylib/packaging/_tokenizer.py b/tools/gyp/pylib/packaging/_tokenizer.py new file mode 100644 index 00000000000000..dd0d648d49a7c1 --- /dev/null +++ b/tools/gyp/pylib/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/tools/gyp/pylib/packaging/markers.py b/tools/gyp/pylib/packaging/markers.py new file mode 100644 index 00000000000000..8b98fca7233be6 --- /dev/null +++ b/tools/gyp/pylib/packaging/markers.py @@ -0,0 +1,252 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import operator +import os +import platform +import sys +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, +) +from ._tokenizer import ParserSyntaxError +from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name + +__all__ = [ + "InvalidMarker", + "UndefinedComparison", + "UndefinedEnvironmentName", + "Marker", + "default_environment", +] + +Operator = Callable[[str, str], bool] + + +class InvalidMarker(ValueError): + """ + An invalid marker was found, users should refer to PEP 508. + """ + + +class UndefinedComparison(ValueError): + """ + An invalid operation was attempted on a value that doesn't support it. + """ + + +class UndefinedEnvironmentName(ValueError): + """ + A name was attempted to be used that does not exist inside of the + environment. + """ + + +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results + + +def _format_marker( + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True +) -> str: + + assert isinstance(marker, (list, tuple, str)) + + # Sometimes we have a structure like [[...]] which is a single item list + # where the single item is itself it's own list. In that case we want skip + # the rest of this function so that we don't get extraneous () on the + # outside. + if ( + isinstance(marker, list) + and len(marker) == 1 + and isinstance(marker[0], (list, tuple)) + ): + return _format_marker(marker[0]) + + if isinstance(marker, list): + inner = (_format_marker(m, first=False) for m in marker) + if first: + return " ".join(inner) + else: + return "(" + " ".join(inner) + ")" + elif isinstance(marker, tuple): + return " ".join([m.serialize() for m in marker]) + else: + return marker + + +_operators: Dict[str, Operator] = { + "in": lambda lhs, rhs: lhs in rhs, + "not in": lambda lhs, rhs: lhs not in rhs, + "<": operator.lt, + "<=": operator.le, + "==": operator.eq, + "!=": operator.ne, + ">=": operator.ge, + ">": operator.gt, +} + + +def _eval_op(lhs: str, op: Op, rhs: str) -> bool: + try: + spec = Specifier("".join([op.serialize(), rhs])) + except InvalidSpecifier: + pass + else: + return spec.contains(lhs, prereleases=True) + + oper: Optional[Operator] = _operators.get(op.serialize()) + if oper is None: + raise UndefinedComparison(f"Undefined {op!r} on {lhs!r} and {rhs!r}.") + + return oper(lhs, rhs) + + +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) + + # other environment markers don't have such standards + return values + + +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: + groups: List[List[bool]] = [[]] + + for marker in markers: + assert isinstance(marker, (list, tuple, str)) + + if isinstance(marker, list): + groups[-1].append(_evaluate_markers(marker, environment)) + elif isinstance(marker, tuple): + lhs, op, rhs = marker + + if isinstance(lhs, Variable): + environment_key = lhs.value + lhs_value = environment[environment_key] + rhs_value = rhs.value + else: + lhs_value = lhs.value + environment_key = rhs.value + rhs_value = environment[environment_key] + + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) + groups[-1].append(_eval_op(lhs_value, op, rhs_value)) + else: + assert marker in ["and", "or"] + if marker == "or": + groups.append([]) + + return any(all(item) for item in groups) + + +def format_full_version(info: "sys._version_info") -> str: + version = "{0.major}.{0.minor}.{0.micro}".format(info) + kind = info.releaselevel + if kind != "final": + version += kind[0] + str(info.serial) + return version + + +def default_environment() -> Dict[str, str]: + iver = format_full_version(sys.implementation.version) + implementation_name = sys.implementation.name + return { + "implementation_name": implementation_name, + "implementation_version": iver, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_release": platform.release(), + "platform_system": platform.system(), + "platform_version": platform.version(), + "python_full_version": platform.python_version(), + "platform_python_implementation": platform.python_implementation(), + "python_version": ".".join(platform.python_version_tuple()[:2]), + "sys_platform": sys.platform, + } + + +class Marker: + def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. + try: + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e + + def __str__(self) -> str: + return _format_marker(self._markers) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: + """Evaluate a marker. + + Return the boolean from evaluating the given marker against the + environment. environment is an optional argument to override all or + part of the determined environment. + + The environment is determined from the current Python process. + """ + current_environment = default_environment() + current_environment["extra"] = "" + if environment is not None: + current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" + + return _evaluate_markers(self._markers, current_environment) diff --git a/tools/gyp/pylib/packaging/metadata.py b/tools/gyp/pylib/packaging/metadata.py new file mode 100644 index 00000000000000..fb274930799da0 --- /dev/null +++ b/tools/gyp/pylib/packaging/metadata.py @@ -0,0 +1,825 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from typing_extensions import Literal, TypedDict + else: + try: + from typing_extensions import Literal, TypedDict + except ImportError: + + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +try: + ExceptionGroup +except NameError: # pragma: no cover + + class ExceptionGroup(Exception): # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + +else: # pragma: no cover + ExceptionGroup = ExceptionGroup + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + value = instance._raw.get(self.name) + + # To make the _process_* methods easier, we'll check if the value is None + # and if this field is NOT a required attribute, and if both of those + # things are true, we'll skip the the converter. This will mean that the + # converters never have to deal with the None union. + if self.name in _REQUIRED_ATTRS or value is not None: + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[Exception] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + raw, unparsed = parse_email(data) + + if validate: + exceptions: list[Exception] = [] + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + raise ExceptionGroup( + "invalid or unparsed metadata", exc_group.exceptions + ) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[Optional[List[str]]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[Optional[str]] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[Optional[str]] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[Optional[List[str]]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[Optional[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[Optional[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[Optional[str]] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[Optional[List[requirements.Requirement]]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[Optional[specifiers.SpecifierSet]] = _Validator( + added="1.2" + ) + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Optional[Dict[str, str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[Optional[List[utils.NormalizedName]]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[Optional[List[str]]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[Optional[List[str]]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/tools/gyp/pylib/packaging/py.typed b/tools/gyp/pylib/packaging/py.typed new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/tools/gyp/pylib/packaging/requirements.py b/tools/gyp/pylib/packaging/requirements.py new file mode 100644 index 00000000000000..0c00eba331b736 --- /dev/null +++ b/tools/gyp/pylib/packaging/requirements.py @@ -0,0 +1,90 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +from typing import Any, Iterator, Optional, Set + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name + + +class InvalidRequirement(ValueError): + """ + An invalid requirement was found, users should refer to PEP 508. + """ + + +class Requirement: + """Parse a requirement. + + Parse a given requirement string into its parts, such as name, specifier, + URL, and extras. Raises InvalidRequirement on a badly-formed requirement + string. + """ + + # TODO: Can we test whether something is contained within a requirement? + # If so how do we do that? Do we need to test against the _name_ of + # the thing as well as the version? What about the markers? + # TODO: Can we normalize the name and extra name? + + def __init__(self, requirement_string: str) -> None: + try: + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: Optional[str] = parsed.url or None + self.extras: Set[str] = set(parsed.extras if parsed.extras else []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name + + if self.extras: + formatted_extras = ",".join(sorted(self.extras)) + yield f"[{formatted_extras}]" + + if self.specifier: + yield str(self.specifier) + + if self.url: + yield f"@ {self.url}" + if self.marker: + yield " " + + if self.marker: + yield f"; {self.marker}" + + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) + + def __repr__(self) -> str: + return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/tools/gyp/pylib/packaging/specifiers.py b/tools/gyp/pylib/packaging/specifiers.py new file mode 100644 index 00000000000000..94448327ae2d44 --- /dev/null +++ b/tools/gyp/pylib/packaging/specifiers.py @@ -0,0 +1,1030 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from packaging.version import Version +""" + +import abc +import itertools +import re +from typing import ( + Callable, + Iterable, + Iterator, + List, + Optional, + Set, + Tuple, + TypeVar, + Union, +) + +from .utils import canonicalize_version +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] + + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version + + +class InvalidSpecifier(ValueError): + """ + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' + """ + + +class BaseSpecifier(metaclass=abc.ABCMeta): + @abc.abstractmethod + def __str__(self) -> str: + """ + Returns the str representation of this Specifier-like object. This + should be representative of the Specifier itself. + """ + + @abc.abstractmethod + def __hash__(self) -> int: + """ + Returns a hash value for this Specifier-like object. + """ + + @abc.abstractmethod + def __eq__(self, other: object) -> bool: + """ + Returns a boolean representing whether or not the two Specifier-like + objects are equal. + + :param other: The other object to check against. + """ + + @property + @abc.abstractmethod + def prereleases(self) -> Optional[bool]: + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. + """ + + @prereleases.setter + def prereleases(self, value: bool) -> None: + """Setter for :attr:`prereleases`. + + :param value: The value to set. + """ + + @abc.abstractmethod + def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: + """ + Determines if the given item is contained within this specifier. + """ + + @abc.abstractmethod + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """ + Takes an iterable of items and filters them so that only items which + are contained within this specifier are allowed in it. + """ + + +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. + + .. tip:: + + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ + + _operator_regex_str = r""" + (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" + (?P + (?: + # The identity operators allow for an escape hatch that will + # do an exact string match of the version you wish to install. + # This will not be parsed by PEP 440 and we cannot determine + # any semantic meaning from it. This operator is discouraged + # but included entirely as an escape hatch. + (?<====) # Only match for the identity operator + \s* + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. + ) + | + (?: + # The (non)equality operators allow for wild card and local + # versions to be specified so we have to define these two + # operators separately to enable that. + (?<===|!=) # Only match for equals and not equals + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)* # release + + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. + (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local + )? + ) + | + (?: + # The compatible operator requires at least two digits in the + # release segment. + (?<=~=) # Only match for the compatible operator + + \s* + v? + (?:[0-9]+!)? # epoch + [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? + (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release + ) + | + (?: + # All other operators only allow a sub set of what the + # (non)equality operators do. Specifically they do not allow + # local versions to be specified nor do they allow the prefix + # matching wild cards. + (?=": "greater_than_equal", + "<": "less_than", + ">": "greater_than", + "===": "arbitrary", + } + + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: + + # Compatible releases have an equivalent combination of >= and ==. That + # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to + # implement this in terms of the other specifiers instead of + # implementing it ourselves. The only thing we need to do is construct + # the other specifiers. + + # We want everything but the last item in the version, but we want to + # ignore suffix segments. + prefix = _version_join( + list(itertools.takewhile(_is_not_suffix, _version_split(spec)))[:-1] + ) + + # Add the prefix notation to the end of our string + prefix += ".*" + + return self._get_operator(">=")(prospective, spec) and self._get_operator("==")( + prospective, prefix + ) + + def _compare_equal(self, prospective: Version, spec: str) -> bool: + + # We need special logic to handle prefix matching + if spec.endswith(".*"): + # In the case of prefix matching we want to ignore local segment. + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) + # Split the spec out by bangs and dots, and pretend that there is + # an implicit dot in between a release segment and a pre-release segment. + split_spec = _version_split(normalized_spec) + + # Split the prospective version out by bangs and dots, and pretend + # that there is an implicit dot in between a release segment and + # a pre-release segment. + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) + + # Shorten the prospective version to be the same length as the spec + # so that we can determine if the specifier is a prefix of the + # prospective version or not. + shortened_prospective = padded_prospective[: len(split_spec)] + + return shortened_prospective == split_spec + else: + # Convert our spec string into a Version + spec_version = Version(spec) + + # If the specifier does not have a local segment, then we want to + # act as if the prospective version also does not have a local + # segment. + if not spec_version.local: + prospective = Version(prospective.public) + + return prospective == spec_version + + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: + return not self._compare_equal(prospective, spec) + + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) <= Version(spec) + + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: + + # NB: Local version identifiers are NOT permitted in the version + # specifier, so local version labels can be universally removed from + # the prospective version. + return Version(prospective.public) >= Version(spec) + + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is less than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective < spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a pre-release version, that we do not accept pre-release + # versions for the version mentioned in the specifier (e.g. <3.1 should + # not match 3.1.dev0, but should match 3.0.dev0). + if not spec.is_prerelease and prospective.is_prerelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # less than the spec version *and* it's not a pre-release of the same + # version in the spec. + return True + + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: + + # Convert our spec to a Version instance, since we'll want to work with + # it as a version. + spec = Version(spec_str) + + # Check to see if the prospective version is greater than the spec + # version. If it's not we can short circuit and just return False now + # instead of doing extra unneeded work. + if not prospective > spec: + return False + + # This special case is here so that, unless the specifier itself + # includes is a post-release version, that we do not accept + # post-release versions for the version mentioned in the specifier + # (e.g. >3.1 should not match 3.0.post0, but should match 3.2.post0). + if not spec.is_postrelease and prospective.is_postrelease: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # Ensure that we do not allow a local version of the version mentioned + # in the specifier, which is technically greater than, to match. + if prospective.local is not None: + if Version(prospective.base_version) == Version(spec.base_version): + return False + + # If we've gotten to here, it means that prospective version is both + # greater than the spec version *and* it's not a pre-release of the + # same version in the spec. + return True + + def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: + return str(prospective).lower() == str(spec).lower() + + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ + + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version + + +_prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") + + +def _version_split(version: str) -> List[str]: + """Split version into components. + + The split components are intended for version comparison. The logic does + not attempt to retain the original version string, so joining the + components back with :func:`_version_join` may not produce the original + version string. + """ + result: List[str] = [] + + epoch, _, rest = version.rpartition("!") + result.append(epoch or "0") + + for item in rest.split("."): + match = _prefix_regex.search(item) + if match: + result.extend(match.groups()) + else: + result.append(item) + return result + + +def _version_join(components: List[str]) -> str: + """Join split version components into a version string. + + This function assumes the input came from :func:`_version_split`, where the + first component must be the epoch (either empty or numeric), and all other + components numeric. + """ + epoch, *rest = components + return f"{epoch}!{'.'.join(rest)}" + + +def _is_not_suffix(segment: str) -> bool: + return not any( + segment.startswith(prefix) for prefix in ("dev", "a", "b", "rc", "post") + ) + + +def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str]]: + left_split, right_split = [], [] + + # Get the release segment of our versions + left_split.append(list(itertools.takewhile(lambda x: x.isdigit(), left))) + right_split.append(list(itertools.takewhile(lambda x: x.isdigit(), right))) + + # Get the rest of our versions + left_split.append(left[len(left_split[0]) :]) + right_split.append(right[len(right_split[0]) :]) + + # Insert our padding + left_split.insert(1, ["0"] * max(0, len(right_split[0]) - len(left_split[0]))) + right_split.insert(1, ["0"] * max(0, len(left_split[0]) - len(right_split[0]))) + + return (list(itertools.chain(*left_split)), list(itertools.chain(*right_split))) + + +class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + + def __init__( + self, specifiers: str = "", prereleases: Optional[bool] = None + ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ + + # Split on `,` to break each individual specifier into it's own item, and + # strip each item to remove leading/trailing whitespace. + split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] + + # Parsed each individual specifier, attempting first to make it a + # Specifier. + parsed: Set[Specifier] = set() + for specifier in split_specifiers: + parsed.add(Specifier(specifier)) + + # Turn our parsed specifiers into a frozen set and save them for later. + self._specs = frozenset(parsed) + + # Store our prereleases value so we can use it later to determine if + # we accept prereleases or not. + self._prereleases = prereleases + + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"" + + def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ + return ",".join(sorted(str(s) for s in self._specs)) + + def __hash__(self) -> int: + return hash(self._specs) + + def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ + if isinstance(other, str): + other = SpecifierSet(other) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + specifier = SpecifierSet() + specifier._specs = frozenset(self._specs | other._specs) + + if self._prereleases is None and other._prereleases is not None: + specifier._prereleases = other._prereleases + elif self._prereleases is not None and other._prereleases is None: + specifier._prereleases = self._prereleases + elif self._prereleases == other._prereleases: + specifier._prereleases = self._prereleases + else: + raise ValueError( + "Cannot combine SpecifierSets with True and False prerelease " + "overrides." + ) + + return specifier + + def __eq__(self, other: object) -> bool: + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): + other = SpecifierSet(str(other)) + elif not isinstance(other, SpecifierSet): + return NotImplemented + + return self._specs == other._specs + + def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" + return len(self._specs) + + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. + + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) + + def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ + return self.contains(item) + + def contains( + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, + ) -> bool: + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) + + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # We can determine if we're going to allow pre-releases by looking to + # see if any of the underlying items supports them. If none of them do + # and this item is a pre-release then we do not allow it and we can + # short circuit that here. + # Note: This means that 1.0.dev1 would not be contained in something + # like >=1.0.devabc however it would be in >=1.0.debabc,>0.0.dev0 + if not prereleases and item.is_prerelease: + return False + + if installed and item.is_prerelease: + item = Version(item.base_version) + + # We simply dispatch to the underlying specs here to make sure that the + # given version is contained within all of them. + # Note: This use of all() here means that an empty set of specifiers + # will always return True, this is an explicit design decision. + return all(s.contains(item, prereleases=prereleases) for s in self._specs) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ + # Determine if we're forcing a prerelease or not, if we're not forcing + # one for this particular filter call, then we'll use whatever the + # SpecifierSet thinks for whether or not we should support prereleases. + if prereleases is None: + prereleases = self.prereleases + + # If we have any specifiers, then we want to wrap our iterable in the + # filter method for each one, this will act as a logical AND amongst + # each specifier. + if self._specs: + for spec in self._specs: + iterable = spec.filter(iterable, prereleases=bool(prereleases)) + return iter(iterable) + # If we do not have any specifiers, then we need to have a rough filter + # which will filter out any pre-releases, unless there are no final + # releases. + else: + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] + + for item in iterable: + parsed_version = _coerce_version(item) + + # Store any item which is a pre-release for later unless we've + # already found a final version or we are accepting prereleases + if parsed_version.is_prerelease and not prereleases: + if not filtered: + found_prereleases.append(item) + else: + filtered.append(item) + + # If we've found no items except for pre-releases, then we'll go + # ahead and use the pre-releases + if not filtered and found_prereleases and prereleases is None: + return iter(found_prereleases) + + return iter(filtered) diff --git a/tools/gyp/pylib/packaging/tags.py b/tools/gyp/pylib/packaging/tags.py new file mode 100644 index 00000000000000..37f33b1ef849ed --- /dev/null +++ b/tools/gyp/pylib/packaging/tags.py @@ -0,0 +1,553 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import logging +import platform +import struct +import subprocess +import sys +import sysconfig +from importlib.machinery import EXTENSION_SUFFIXES +from typing import ( + Dict, + FrozenSet, + Iterable, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from . import _manylinux, _musllinux + +logger = logging.getLogger(__name__) + +PythonVersion = Sequence[int] +MacVersion = Tuple[int, int] + +INTERPRETER_SHORT_NAMES: Dict[str, str] = { + "python": "py", # Generic. + "cpython": "cp", + "pypy": "pp", + "ironpython": "ip", + "jython": "jy", +} + + +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 + + +class Tag: + """ + A representation of the tag triple for a wheel. + + Instances are considered immutable and thus are hashable. Equality checking + is also supported. + """ + + __slots__ = ["_interpreter", "_abi", "_platform", "_hash"] + + def __init__(self, interpreter: str, abi: str, platform: str) -> None: + self._interpreter = interpreter.lower() + self._abi = abi.lower() + self._platform = platform.lower() + # The __hash__ of every single element in a Set[Tag] will be evaluated each time + # that a set calls its `.disjoint()` method, which may be called hundreds of + # times when scanning a page of links for packages with tags matching that + # Set[Tag]. Pre-computing the value here produces significant speedups for + # downstream consumers. + self._hash = hash((self._interpreter, self._abi, self._platform)) + + @property + def interpreter(self) -> str: + return self._interpreter + + @property + def abi(self) -> str: + return self._abi + + @property + def platform(self) -> str: + return self._platform + + def __eq__(self, other: object) -> bool: + if not isinstance(other, Tag): + return NotImplemented + + return ( + (self._hash == other._hash) # Short-circuit ASAP for perf reasons. + and (self._platform == other._platform) + and (self._abi == other._abi) + and (self._interpreter == other._interpreter) + ) + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + return f"{self._interpreter}-{self._abi}-{self._platform}" + + def __repr__(self) -> str: + return f"<{self} @ {id(self)}>" + + +def parse_tag(tag: str) -> FrozenSet[Tag]: + """ + Parses the provided tag (e.g. `py3-none-any`) into a frozenset of Tag instances. + + Returning a set is required due to the possibility that the tag is a + compressed tag set. + """ + tags = set() + interpreters, abis, platforms = tag.split("-") + for interpreter in interpreters.split("."): + for abi in abis.split("."): + for platform_ in platforms.split("."): + tags.add(Tag(interpreter, abi, platform_)) + return frozenset(tags) + + +def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: + value: Union[int, str, None] = sysconfig.get_config_var(name) + if value is None and warn: + logger.debug( + "Config variable '%s' is unset, Python ABI tag may be incorrect", name + ) + return value + + +def _normalize_string(string: str) -> str: + return string.replace(".", "_").replace("-", "_").replace(" ", "_") + + +def _abi3_applies(python_version: PythonVersion) -> bool: + """ + Determine if the Python version supports abi3. + + PEP 384 was first implemented in Python 3.2. + """ + return len(python_version) > 1 and tuple(python_version) >= (3, 2) + + +def _cpython_abis(py_version: PythonVersion, warn: bool = False) -> List[str]: + py_version = tuple(py_version) # To allow for version comparison. + abis = [] + version = _version_nodot(py_version[:2]) + debug = pymalloc = ucs4 = "" + with_debug = _get_config_var("Py_DEBUG", warn) + has_refcount = hasattr(sys, "gettotalrefcount") + # Windows doesn't set Py_DEBUG, so checking for support of debug-compiled + # extension modules is the best option. + # https://github.com/pypa/pip/issues/3383#issuecomment-173267692 + has_ext = "_d.pyd" in EXTENSION_SUFFIXES + if with_debug or (with_debug is None and (has_refcount or has_ext)): + debug = "d" + if py_version < (3, 8): + with_pymalloc = _get_config_var("WITH_PYMALLOC", warn) + if with_pymalloc or with_pymalloc is None: + pymalloc = "m" + if py_version < (3, 3): + unicode_size = _get_config_var("Py_UNICODE_SIZE", warn) + if unicode_size == 4 or ( + unicode_size is None and sys.maxunicode == 0x10FFFF + ): + ucs4 = "u" + elif debug: + # Debug builds can also load "normal" extension modules. + # We can also assume no UCS-4 or pymalloc requirement. + abis.append(f"cp{version}") + abis.insert( + 0, + "cp{version}{debug}{pymalloc}{ucs4}".format( + version=version, debug=debug, pymalloc=pymalloc, ucs4=ucs4 + ), + ) + return abis + + +def cpython_tags( + python_version: Optional[PythonVersion] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a CPython interpreter. + + The tags consist of: + - cp-- + - cp-abi3- + - cp-none- + - cp-abi3- # Older Python versions down to 3.2. + + If python_version only specifies a major version then user-provided ABIs and + the 'none' ABItag will be used. + + If 'abi3' or 'none' are specified in 'abis' then they will be yielded at + their normal position and not at the beginning. + """ + if not python_version: + python_version = sys.version_info[:2] + + interpreter = f"cp{_version_nodot(python_version[:2])}" + + if abis is None: + if len(python_version) > 1: + abis = _cpython_abis(python_version, warn) + else: + abis = [] + abis = list(abis) + # 'abi3' and 'none' are explicitly handled later. + for explicit_abi in ("abi3", "none"): + try: + abis.remove(explicit_abi) + except ValueError: + pass + + platforms = list(platforms or platform_tags()) + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + if _abi3_applies(python_version): + yield from (Tag(interpreter, "abi3", platform_) for platform_ in platforms) + yield from (Tag(interpreter, "none", platform_) for platform_ in platforms) + + if _abi3_applies(python_version): + for minor_version in range(python_version[1] - 1, 1, -1): + for platform_ in platforms: + interpreter = "cp{version}".format( + version=_version_nodot((python_version[0], minor_version)) + ) + yield Tag(interpreter, "abi3", platform_) + + +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] + + +def generic_tags( + interpreter: Optional[str] = None, + abis: Optional[Iterable[str]] = None, + platforms: Optional[Iterable[str]] = None, + *, + warn: bool = False, +) -> Iterator[Tag]: + """ + Yields the tags for a generic interpreter. + + The tags consist of: + - -- + + The "none" ABI will be added if it was not explicitly provided. + """ + if not interpreter: + interp_name = interpreter_name() + interp_version = interpreter_version(warn=warn) + interpreter = "".join([interp_name, interp_version]) + if abis is None: + abis = _generic_abi() + else: + abis = list(abis) + platforms = list(platforms or platform_tags()) + if "none" not in abis: + abis.append("none") + for abi in abis: + for platform_ in platforms: + yield Tag(interpreter, abi, platform_) + + +def _py_interpreter_range(py_version: PythonVersion) -> Iterator[str]: + """ + Yields Python versions in descending order. + + After the latest version, the major-only version will be yielded, and then + all previous versions of that major version. + """ + if len(py_version) > 1: + yield f"py{_version_nodot(py_version[:2])}" + yield f"py{py_version[0]}" + if len(py_version) > 1: + for minor in range(py_version[1] - 1, -1, -1): + yield f"py{_version_nodot((py_version[0], minor))}" + + +def compatible_tags( + python_version: Optional[PythonVersion] = None, + interpreter: Optional[str] = None, + platforms: Optional[Iterable[str]] = None, +) -> Iterator[Tag]: + """ + Yields the sequence of tags that are compatible with a specific version of Python. + + The tags consist of: + - py*-none- + - -none-any # ... if `interpreter` is provided. + - py*-none-any + """ + if not python_version: + python_version = sys.version_info[:2] + platforms = list(platforms or platform_tags()) + for version in _py_interpreter_range(python_version): + for platform_ in platforms: + yield Tag(version, "none", platform_) + if interpreter: + yield Tag(interpreter, "none", "any") + for version in _py_interpreter_range(python_version): + yield Tag(version, "none", "any") + + +def _mac_arch(arch: str, is_32bit: bool = _32_BIT_INTERPRETER) -> str: + if not is_32bit: + return arch + + if arch.startswith("ppc"): + return "ppc" + + return "i386" + + +def _mac_binary_formats(version: MacVersion, cpu_arch: str) -> List[str]: + formats = [cpu_arch] + if cpu_arch == "x86_64": + if version < (10, 4): + return [] + formats.extend(["intel", "fat64", "fat32"]) + + elif cpu_arch == "i386": + if version < (10, 4): + return [] + formats.extend(["intel", "fat32", "fat"]) + + elif cpu_arch == "ppc64": + # TODO: Need to care about 32-bit PPC for ppc64 through 10.2? + if version > (10, 5) or version < (10, 4): + return [] + formats.append("fat64") + + elif cpu_arch == "ppc": + if version > (10, 6): + return [] + formats.extend(["fat32", "fat"]) + + if cpu_arch in {"arm64", "x86_64"}: + formats.append("universal2") + + if cpu_arch in {"x86_64", "i386", "ppc64", "ppc", "intel"}: + formats.append("universal") + + return formats + + +def mac_platforms( + version: Optional[MacVersion] = None, arch: Optional[str] = None +) -> Iterator[str]: + """ + Yields the platform tags for a macOS system. + + The `version` parameter is a two-item tuple specifying the macOS version to + generate platform tags for. The `arch` parameter is the CPU architecture to + generate platform tags for. Both parameters default to the appropriate value + for the current system. + """ + version_str, _, cpu_arch = platform.mac_ver() + if version is None: + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + else: + version = version + if arch is None: + arch = _mac_arch(cpu_arch) + else: + arch = arch + + if (10, 0) <= version and version < (11, 0): + # Prior to Mac OS 11, each yearly release of Mac OS bumped the + # "minor" version number. The major version was always 10. + for minor_version in range(version[1], -1, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=10, minor=minor_version, binary_format=binary_format + ) + + if version >= (11, 0): + # Starting with Mac OS 11, each yearly release bumps the major version + # number. The minor versions are now the midyear updates. + for major_version in range(version[0], 10, -1): + compat_version = major_version, 0 + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=major_version, minor=0, binary_format=binary_format + ) + + if version >= (11, 0): + # Mac OS 11 on x86_64 is compatible with binaries from previous releases. + # Arm64 support was introduced in 11.0, so no Arm binaries from previous + # releases exist. + # + # However, the "universal2" binary format can have a + # macOS version earlier than 11.0 when the x86_64 part of the binary supports + # that version of macOS. + if arch == "x86_64": + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_formats = _mac_binary_formats(compat_version, arch) + for binary_format in binary_formats: + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + else: + for minor_version in range(16, 3, -1): + compat_version = 10, minor_version + binary_format = "universal2" + yield "macosx_{major}_{minor}_{binary_format}".format( + major=compat_version[0], + minor=compat_version[1], + binary_format=binary_format, + ) + + +def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: + linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return + if is_32bit: + if linux == "linux_x86_64": + linux = "linux_i686" + elif linux == "linux_aarch64": + linux = "linux_armv8l" + _, arch = linux.split("_", 1) + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" + + +def _generic_platforms() -> Iterator[str]: + yield _normalize_string(sysconfig.get_platform()) + + +def platform_tags() -> Iterator[str]: + """ + Provides the platform tags for this installation. + """ + if platform.system() == "Darwin": + return mac_platforms() + elif platform.system() == "Linux": + return _linux_platforms() + else: + return _generic_platforms() + + +def interpreter_name() -> str: + """ + Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. + """ + name = sys.implementation.name + return INTERPRETER_SHORT_NAMES.get(name) or name + + +def interpreter_version(*, warn: bool = False) -> str: + """ + Returns the version of the running interpreter. + """ + version = _get_config_var("py_version_nodot", warn=warn) + if version: + version = str(version) + else: + version = _version_nodot(sys.version_info[:2]) + return version + + +def _version_nodot(version: PythonVersion) -> str: + return "".join(map(str, version)) + + +def sys_tags(*, warn: bool = False) -> Iterator[Tag]: + """ + Returns the sequence of tag triples for the running interpreter. + + The order of the sequence corresponds to priority order for the + interpreter, from most to least important. + """ + + interp_name = interpreter_name() + if interp_name == "cp": + yield from cpython_tags(warn=warn) + else: + yield from generic_tags() + + if interp_name == "pp": + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) + else: + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/tools/gyp/pylib/packaging/utils.py b/tools/gyp/pylib/packaging/utils.py new file mode 100644 index 00000000000000..c2c2f75aa80628 --- /dev/null +++ b/tools/gyp/pylib/packaging/utils.py @@ -0,0 +1,172 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. + +import re +from typing import FrozenSet, NewType, Tuple, Union, cast + +from .tags import Tag, parse_tag +from .version import InvalidVersion, Version + +BuildTag = Union[Tuple[()], Tuple[int, str]] +NormalizedName = NewType("NormalizedName", str) + + +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + +class InvalidWheelFilename(ValueError): + """ + An invalid wheel filename was found, users should refer to PEP 427. + """ + + +class InvalidSdistFilename(ValueError): + """ + An invalid sdist filename was found, users should refer to the packaging user guide. + """ + + +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) +_canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") +# PEP 427: The build number must start with a digit. +_build_tag_regex = re.compile(r"(\d+)(.*)") + + +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") + # This is taken from PEP 503. + value = _canonicalize_regex.sub("-", name).lower() + return cast(NormalizedName, value) + + +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: + """ + This is very similar to Version.__str__, but has one subtle difference + with the way it handles the release segment. + """ + if isinstance(version, str): + try: + parsed = Version(version) + except InvalidVersion: + # Legacy versions cannot be normalized + return version + else: + parsed = version + + parts = [] + + # Epoch + if parsed.epoch != 0: + parts.append(f"{parsed.epoch}!") + + # Release segment + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) + + # Pre-release + if parsed.pre is not None: + parts.append("".join(str(x) for x in parsed.pre)) + + # Post-release + if parsed.post is not None: + parts.append(f".post{parsed.post}") + + # Development release + if parsed.dev is not None: + parts.append(f".dev{parsed.dev}") + + # Local version segment + if parsed.local is not None: + parts.append(f"+{parsed.local}") + + return "".join(parts) + + +def parse_wheel_filename( + filename: str, +) -> Tuple[NormalizedName, Version, BuildTag, FrozenSet[Tag]]: + if not filename.endswith(".whl"): + raise InvalidWheelFilename( + f"Invalid wheel filename (extension must be '.whl'): {filename}" + ) + + filename = filename[:-4] + dashes = filename.count("-") + if dashes not in (4, 5): + raise InvalidWheelFilename( + f"Invalid wheel filename (wrong number of parts): {filename}" + ) + + parts = filename.split("-", dashes - 2) + name_part = parts[0] + # See PEP 427 for the rules on escaping the project name. + if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: + raise InvalidWheelFilename(f"Invalid project name: {filename}") + name = canonicalize_name(name_part) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename}" + ) from e + + if dashes == 5: + build_part = parts[2] + build_match = _build_tag_regex.match(build_part) + if build_match is None: + raise InvalidWheelFilename( + f"Invalid build number: {build_part} in '{filename}'" + ) + build = cast(BuildTag, (int(build_match.group(1)), build_match.group(2))) + else: + build = () + tags = parse_tag(parts[-1]) + return (name, version, build, tags) + + +def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: + if filename.endswith(".tar.gz"): + file_stem = filename[: -len(".tar.gz")] + elif filename.endswith(".zip"): + file_stem = filename[: -len(".zip")] + else: + raise InvalidSdistFilename( + f"Invalid sdist filename (extension must be '.tar.gz' or '.zip'):" + f" {filename}" + ) + + # We are requiring a PEP 440 version, which cannot contain dashes, + # so we split on the last dash. + name_part, sep, version_part = file_stem.rpartition("-") + if not sep: + raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") + + name = canonicalize_name(name_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename}" + ) from e + + return (name, version) diff --git a/tools/gyp/pylib/packaging/version.py b/tools/gyp/pylib/packaging/version.py new file mode 100644 index 00000000000000..5faab9bd0dcf28 --- /dev/null +++ b/tools/gyp/pylib/packaging/version.py @@ -0,0 +1,563 @@ +# This file is dual licensed under the terms of the Apache License, Version +# 2.0, and the BSD License. See the LICENSE file in the root of this repository +# for complete details. +""" +.. testsetup:: + + from packaging.version import parse, Version +""" + +import itertools +import re +from typing import Any, Callable, NamedTuple, Optional, SupportsInt, Tuple, Union + +from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType + +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] + +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ + NegativeInfinityType, + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], +] +CmpKey = Tuple[ + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, +] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] + + +class _Version(NamedTuple): + epoch: int + release: Tuple[int, ...] + dev: Optional[Tuple[str, int]] + pre: Optional[Tuple[str, int]] + post: Optional[Tuple[str, int]] + local: Optional[LocalType] + + +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. + """ + return Version(version) + + +class InvalidVersion(ValueError): + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' + """ + + +class _BaseVersion: + _key: Tuple[Any, ...] + + def __hash__(self) -> int: + return hash(self._key) + + # Please keep the duplicated `isinstance` check + # in the six comparisons hereunder + # unless you find a way to avoid adding overhead function calls. + def __lt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key < other._key + + def __le__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key <= other._key + + def __eq__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key == other._key + + def __ge__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key >= other._key + + def __gt__(self, other: "_BaseVersion") -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key > other._key + + def __ne__(self, other: object) -> bool: + if not isinstance(other, _BaseVersion): + return NotImplemented + + return self._key != other._key + + +# Deliberately not anchored to the start and end of the string, to make it +# easier for 3rd party code to reuse +_VERSION_PATTERN = r""" + v? + (?: + (?:(?P[0-9]+)!)? # epoch + (?P[0-9]+(?:\.[0-9]+)*) # release segment + (?P
                                          # pre-release
+            [-_\.]?
+            (?Palpha|a|beta|b|preview|pre|c|rc)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+        (?P                                         # post release
+            (?:-(?P[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?Ppost|rev|r)
+                [-_\.]?
+                (?P[0-9]+)?
+            )
+        )?
+        (?P                                          # dev release
+            [-_\.]?
+            (?Pdev)
+            [-_\.]?
+            (?P[0-9]+)?
+        )?
+    )
+    (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+"""
+
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
+
+class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
+
+    _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
+
+    def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
+
+        # Validate the version and parse it into pieces
+        match = self._regex.search(version)
+        if not match:
+            raise InvalidVersion(f"Invalid version: '{version}'")
+
+        # Store the parsed out pieces of the version
+        self._version = _Version(
+            epoch=int(match.group("epoch")) if match.group("epoch") else 0,
+            release=tuple(int(i) for i in match.group("release").split(".")),
+            pre=_parse_letter_version(match.group("pre_l"), match.group("pre_n")),
+            post=_parse_letter_version(
+                match.group("post_l"), match.group("post_n1") or match.group("post_n2")
+            ),
+            dev=_parse_letter_version(match.group("dev_l"), match.group("dev_n")),
+            local=_parse_local_version(match.group("local")),
+        )
+
+        # Generate a key which will be used for sorting
+        self._key = _cmpkey(
+            self._version.epoch,
+            self._version.release,
+            self._version.pre,
+            self._version.post,
+            self._version.dev,
+            self._version.local,
+        )
+
+    def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
+        return f""
+
+    def __str__(self) -> str:
+        """A string representation of the version that can be rounded-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        # Pre-release
+        if self.pre is not None:
+            parts.append("".join(str(x) for x in self.pre))
+
+        # Post-release
+        if self.post is not None:
+            parts.append(f".post{self.post}")
+
+        # Development release
+        if self.dev is not None:
+            parts.append(f".dev{self.dev}")
+
+        # Local version segment
+        if self.local is not None:
+            parts.append(f"+{self.local}")
+
+        return "".join(parts)
+
+    @property
+    def epoch(self) -> int:
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
+
+    @property
+    def release(self) -> Tuple[int, ...]:
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
+
+    @property
+    def pre(self) -> Optional[Tuple[str, int]]:
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
+
+    @property
+    def post(self) -> Optional[int]:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
+        return self._version.post[1] if self._version.post else None
+
+    @property
+    def dev(self) -> Optional[int]:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
+        return self._version.dev[1] if self._version.dev else None
+
+    @property
+    def local(self) -> Optional[str]:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
+        if self._version.local:
+            return ".".join(str(x) for x in self._version.local)
+        else:
+            return None
+
+    @property
+    def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1.2.3+abc.dev1").public
+        '1.2.3'
+        """
+        return str(self).split("+", 1)[0]
+
+    @property
+    def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3+abc.dev1").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
+        parts = []
+
+        # Epoch
+        if self.epoch != 0:
+            parts.append(f"{self.epoch}!")
+
+        # Release segment
+        parts.append(".".join(str(x) for x in self.release))
+
+        return "".join(parts)
+
+    @property
+    def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
+        return self.dev is not None or self.pre is not None
+
+    @property
+    def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
+        return self.post is not None
+
+    @property
+    def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
+        return self.dev is not None
+
+    @property
+    def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
+        return self.release[0] if len(self.release) >= 1 else 0
+
+    @property
+    def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
+        return self.release[1] if len(self.release) >= 2 else 0
+
+    @property
+    def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
+        return self.release[2] if len(self.release) >= 3 else 0
+
+
+def _parse_letter_version(
+    letter: Optional[str], number: Union[str, bytes, SupportsInt, None]
+) -> Optional[Tuple[str, int]]:
+
+    if letter:
+        # We consider there to be an implicit 0 in a pre-release if there is
+        # not a numeral associated with it.
+        if number is None:
+            number = 0
+
+        # We normalize any letters to their lower case form
+        letter = letter.lower()
+
+        # We consider some words to be alternate spellings of other words and
+        # in those cases we want to normalize the spellings to our preferred
+        # spelling.
+        if letter == "alpha":
+            letter = "a"
+        elif letter == "beta":
+            letter = "b"
+        elif letter in ["c", "pre", "preview"]:
+            letter = "rc"
+        elif letter in ["rev", "r"]:
+            letter = "post"
+
+        return letter, int(number)
+    if not letter and number:
+        # We assume if we are given a number, but we are not given a letter
+        # then this is using the implicit post release syntax (e.g. 1.0-1)
+        letter = "post"
+
+        return letter, int(number)
+
+    return None
+
+
+_local_version_separators = re.compile(r"[\._-]")
+
+
+def _parse_local_version(local: Optional[str]) -> Optional[LocalType]:
+    """
+    Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
+    """
+    if local is not None:
+        return tuple(
+            part.lower() if not part.isdigit() else int(part)
+            for part in _local_version_separators.split(local)
+        )
+    return None
+
+
+def _cmpkey(
+    epoch: int,
+    release: Tuple[int, ...],
+    pre: Optional[Tuple[str, int]],
+    post: Optional[Tuple[str, int]],
+    dev: Optional[Tuple[str, int]],
+    local: Optional[LocalType],
+) -> CmpKey:
+
+    # When we compare a release version, we want to compare it with all of the
+    # trailing zeros removed. So we'll use a reverse the list, drop all the now
+    # leading zeros until we come to something non zero, then take the rest
+    # re-reverse it back into the correct order and make it a tuple and use
+    # that for our sorting key.
+    _release = tuple(
+        reversed(list(itertools.dropwhile(lambda x: x == 0, reversed(release))))
+    )
+
+    # We need to "trick" the sorting algorithm to put 1.0.dev0 before 1.0a0.
+    # We'll do this by abusing the pre segment, but we _only_ want to do this
+    # if there is not a pre or a post segment. If we have one of those then
+    # the normal sorting rules will handle this case correctly.
+    if pre is None and post is None and dev is not None:
+        _pre: CmpPrePostDevType = NegativeInfinity
+    # Versions without a pre-release (except as noted above) should sort after
+    # those with one.
+    elif pre is None:
+        _pre = Infinity
+    else:
+        _pre = pre
+
+    # Versions without a post segment should sort before those with one.
+    if post is None:
+        _post: CmpPrePostDevType = NegativeInfinity
+
+    else:
+        _post = post
+
+    # Versions without a development segment should sort after those with one.
+    if dev is None:
+        _dev: CmpPrePostDevType = Infinity
+
+    else:
+        _dev = dev
+
+    if local is None:
+        # Versions without a local segment should sort before those with one.
+        _local: CmpLocalType = NegativeInfinity
+    else:
+        # Versions with a local segment need that segment parsed to implement
+        # the sorting rules in PEP440.
+        # - Alpha numeric segments sort before numeric segments
+        # - Alpha numeric segments sort lexicographically
+        # - Numeric segments sort numerically
+        # - Shorter versions sort before longer versions when the prefixes
+        #   match exactly
+        _local = tuple(
+            (i, "") if isinstance(i, int) else (NegativeInfinity, i) for i in local
+        )
+
+    return epoch, _release, _pre, _post, _dev, _local
diff --git a/tools/gyp/pyproject.toml b/tools/gyp/pyproject.toml
new file mode 100644
index 00000000000000..0c25d0b3c1a065
--- /dev/null
+++ b/tools/gyp/pyproject.toml
@@ -0,0 +1,119 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "gyp-next"
+version = "0.16.1"
+authors = [
+  { name="Node.js contributors", email="ryzokuken@disroot.org" },
+]
+description = "A fork of the GYP build system for use in the Node.js projects"
+readme = "README.md"
+license = { file="LICENSE" }
+requires-python = ">=3.8"
+# The Python module "packaging" is vendored in the "pylib/packaging" directory to support Python >= 3.12.
+# dependencies = ["packaging>=23.1"]  # Uncomment this line if the vendored version is removed.
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: BSD License",
+    "Natural Language :: English",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+]
+
+[project.optional-dependencies]
+dev = ["flake8", "ruff", "pytest"]
+
+[project.scripts]
+gyp = "gyp:script_main"
+
+[project.urls]
+"Homepage" = "https://github.com/nodejs/gyp-next"
+
+[tool.ruff]
+select = [
+  "C4",   # flake8-comprehensions
+  "C90",  # McCabe cyclomatic complexity
+  "DTZ",  # flake8-datetimez
+  "E",    # pycodestyle
+  "F",    # Pyflakes
+  "G",    # flake8-logging-format
+  "ICN",  # flake8-import-conventions
+  "INT",  # flake8-gettext
+  "PL",   # Pylint
+  "PYI",  # flake8-pyi
+  "RSE",  # flake8-raise
+  "RUF",  # Ruff-specific rules
+  "T10",  # flake8-debugger
+  "TCH",  # flake8-type-checking
+  "TID",  # flake8-tidy-imports
+  "UP",   # pyupgrade
+  "W",    # pycodestyle
+  "YTT",  # flake8-2020
+  # "A",    # flake8-builtins
+  # "ANN",  # flake8-annotations
+  # "ARG",  # flake8-unused-arguments
+  # "B",    # flake8-bugbear
+  # "BLE",  # flake8-blind-except
+  # "COM",  # flake8-commas
+  # "D",    # pydocstyle
+  # "DJ",   # flake8-django
+  # "EM",   # flake8-errmsg
+  # "ERA",  # eradicate
+  # "EXE",  # flake8-executable
+  # "FBT",  # flake8-boolean-trap
+  # "I",    # isort
+  # "INP",  # flake8-no-pep420
+  # "ISC",  # flake8-implicit-str-concat
+  # "N",    # pep8-naming
+  # "NPY",  # NumPy-specific rules
+  # "PD",   # pandas-vet
+  # "PGH",  # pygrep-hooks
+  # "PIE",  # flake8-pie
+  # "PT",   # flake8-pytest-style
+  # "PTH",  # flake8-use-pathlib
+  # "Q",    # flake8-quotes
+  # "RET",  # flake8-return
+  # "S",    # flake8-bandit
+  # "SIM",  # flake8-simplify
+  # "SLF",  # flake8-self
+  # "T20",  # flake8-print
+  # "TRY",  # tryceratops
+]
+ignore = [
+  "E721",
+  "PLC1901",
+  "PLR0402",
+  "PLR1714",
+  "PLR2004",
+  "PLR5501",
+  "PLW0603",
+  "PLW2901",
+  "PYI024",
+  "RUF005",
+  "RUF012",
+  "UP031",
+]
+extend-exclude = ["pylib/packaging"]
+line-length = 88
+target-version = "py37"
+
+[tool.ruff.mccabe]
+max-complexity = 101
+
+[tool.ruff.pylint]
+max-args = 11
+max-branches = 108
+max-returns = 10
+max-statements = 286
+
+[tool.setuptools]
+package-dir = {"" = "pylib"}
+packages = ["gyp", "gyp.generator"]
diff --git a/tools/gyp/requirements_dev.txt b/tools/gyp/requirements_dev.txt
deleted file mode 100644
index 28ecacab602938..00000000000000
--- a/tools/gyp/requirements_dev.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-flake8
-pytest
diff --git a/tools/gyp/setup.py b/tools/gyp/setup.py
deleted file mode 100644
index 1bb6908deaf8ce..00000000000000
--- a/tools/gyp/setup.py
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2009 Google Inc. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-from os import path
-
-from setuptools import setup
-
-here = path.abspath(path.dirname(__file__))
-# Get the long description from the README file
-with open(path.join(here, "README.md")) as in_file:
-    long_description = in_file.read()
-
-setup(
-    name="gyp-next",
-    version="0.13.0",
-    description="A fork of the GYP build system for use in the Node.js projects",
-    long_description=long_description,
-    long_description_content_type="text/markdown",
-    author="Node.js contributors",
-    author_email="ryzokuken@disroot.org",
-    url="https://github.com/nodejs/gyp-next",
-    package_dir={"": "pylib"},
-    packages=["gyp", "gyp.generator"],
-    entry_points={"console_scripts": ["gyp=gyp:script_main"]},
-    python_requires=">=3.6",
-    classifiers=[
-        "Development Status :: 3 - Alpha",
-        "Environment :: Console",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: BSD License",
-        "Natural Language :: English",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3.8",
-        "Programming Language :: Python :: 3.9",
-    ],
-)
diff --git a/tools/gyp/tools/pretty_sln.py b/tools/gyp/tools/pretty_sln.py
index 6ca0cd12a7ba06..cf0638a23d635d 100755
--- a/tools/gyp/tools/pretty_sln.py
+++ b/tools/gyp/tools/pretty_sln.py
@@ -34,10 +34,10 @@ def BuildProject(project, built, projects, deps):
 
 def ParseSolution(solution_file):
     # All projects, their clsid and paths.
-    projects = dict()
+    projects = {}
 
     # A list of dependencies associated with a project.
-    dependencies = dict()
+    dependencies = {}
 
     # Regular expressions that matches the SLN format.
     # The first line of a project definition.
diff --git a/tools/gyp/tools/pretty_vcproj.py b/tools/gyp/tools/pretty_vcproj.py
index 00d32debda51f0..72c65d7fc495f9 100755
--- a/tools/gyp/tools/pretty_vcproj.py
+++ b/tools/gyp/tools/pretty_vcproj.py
@@ -21,7 +21,7 @@
 
 __author__ = "nsylvain (Nicolas Sylvain)"
 ARGUMENTS = None
-REPLACEMENTS = dict()
+REPLACEMENTS = {}
 
 
 def cmp(x, y):

From 91a0944e5fe9293eb216b58a5302488f70dae270 Mon Sep 17 00:00:00 2001
From: Ethan Arrowood 
Date: Tue, 31 Oct 2023 13:49:14 -0600
Subject: [PATCH 056/229] meta: add ethan.arrowood@vercel.com to mailmap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50491
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: Tobias Nießen 
Reviewed-By: Luigi Pinca 
---
 .mailmap | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index eefb00db9b7afa..24ff039703b9c1 100644
--- a/.mailmap
+++ b/.mailmap
@@ -92,8 +92,8 @@ Caralyn Reisle 
 Charles  
 Charles Rudolph 
 Chemi Atlow 
-Chemi Atlow   
-Chemi Atlow   
+Chemi Atlow  
+Chemi Atlow  
 Chen Gang 
 Chen Gang  <13298548+MoonBall@users.noreply.github.com>
 Chengzhong Wu 
@@ -161,6 +161,7 @@ Erik Corry 
 Ernesto Salazar 
 Erwin W. Ramadhan 
 Ethan Arrowood  
+Ethan Arrowood  
 Eugene Obrezkov 
 Eugene Ostroukhov  
 Eugene Ostroukhov  

From c1a196c897ca4ea7640e729f558fd3ed945bf7f9 Mon Sep 17 00:00:00 2001
From: James Sumners <321201+jsumners@users.noreply.github.com>
Date: Tue, 31 Oct 2023 17:11:15 -0400
Subject: [PATCH 057/229] esm: add import.meta.dirname and import.meta.filename

PR-URL: https://github.com/nodejs/node/pull/48740
Reviewed-By: Matteo Collina 
Reviewed-By: Jiawen Geng 
Reviewed-By: Benjamin Gruenbaum 
Reviewed-By: Stephen Belanger 
Reviewed-By: James M Snell 
Reviewed-By: Geoffrey Booth 
---
 benchmark/fixtures/esm-dir-file.mjs           |  3 ++
 benchmark/fixtures/load-esm-dir-file.js       |  5 +++
 benchmark/misc/startup.js                     |  1 +
 doc/api/esm.md                                | 35 ++++++++++++++++++-
 .../modules/esm/initialize_import_meta.js     | 13 ++++++-
 test/es-module/test-esm-import-meta.mjs       | 16 ++++++++-
 .../test_cannot_run_js/entry_point.c          |  7 ++++
 7 files changed, 77 insertions(+), 3 deletions(-)
 create mode 100644 benchmark/fixtures/esm-dir-file.mjs
 create mode 100644 benchmark/fixtures/load-esm-dir-file.js
 create mode 100644 test/js-native-api/test_cannot_run_js/entry_point.c

diff --git a/benchmark/fixtures/esm-dir-file.mjs b/benchmark/fixtures/esm-dir-file.mjs
new file mode 100644
index 00000000000000..d5dafc5c46697e
--- /dev/null
+++ b/benchmark/fixtures/esm-dir-file.mjs
@@ -0,0 +1,3 @@
+import assert from 'assert';
+assert.ok(import.meta.dirname);
+assert.ok(import.meta.filename);
diff --git a/benchmark/fixtures/load-esm-dir-file.js b/benchmark/fixtures/load-esm-dir-file.js
new file mode 100644
index 00000000000000..a4115dd39e0489
--- /dev/null
+++ b/benchmark/fixtures/load-esm-dir-file.js
@@ -0,0 +1,5 @@
+(async function () {
+  for (let i = 0; i < 1000; i += 1) {
+    await import(`./esm-dir-file.mjs?i=${i}`);
+  }
+}());
diff --git a/benchmark/misc/startup.js b/benchmark/misc/startup.js
index 07c0701d128899..f55be1a7902e4f 100644
--- a/benchmark/misc/startup.js
+++ b/benchmark/misc/startup.js
@@ -9,6 +9,7 @@ const bench = common.createBenchmark(main, {
   script: [
     'benchmark/fixtures/require-builtins',
     'test/fixtures/semicolon',
+    'benchmark/fixtures/load-esm-dir-file',
   ],
   mode: ['process', 'worker'],
   count: [30],
diff --git a/doc/api/esm.md b/doc/api/esm.md
index b8579a58ed2bd5..83492ea15d0bbd 100644
--- a/doc/api/esm.md
+++ b/doc/api/esm.md
@@ -332,6 +332,35 @@ modules it can be used to load ES modules.
 The `import.meta` meta property is an `Object` that contains the following
 properties.
 
+### `import.meta.dirname`
+
+
+
+> Stability: 1.2 - Release candidate
+
+* {string} The directory name of the current module. This is the same as the
+  [`path.dirname()`][] of the [`import.meta.filename`][].
+
+> **Caveat**: only present on `file:` modules.
+
+### `import.meta.filename`
+
+
+
+> Stability: 1.2 - Release candidate
+
+* {string} The full absolute path and filename of the current module, with
+* symlinks resolved.
+* This is the same as the [`url.fileURLToPath()`][] of the
+* [`import.meta.url`][].
+
+> **Caveat** only local modules support this property. Modules not using the
+> `file:` protocol will not provide it.
+
 ### `import.meta.url`
 
 * {string} The absolute `file:` URL of the module.
@@ -526,7 +555,7 @@ If needed, a `require` function can be constructed within an ES module using
 These CommonJS variables are not available in ES modules.
 
 `__filename` and `__dirname` use cases can be replicated via
-[`import.meta.url`][].
+[`import.meta.filename`][] and [`import.meta.dirname`][].
 
 #### No Addon Loading
 
@@ -1107,13 +1136,17 @@ resolution for ESM specifiers is [commonjs-extension-resolution-loader][].
 [`data:` URLs]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs
 [`export`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/export
 [`import()`]: #import-expressions
+[`import.meta.dirname`]: #importmetadirname
+[`import.meta.filename`]: #importmetafilename
 [`import.meta.resolve`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/import.meta/resolve
 [`import.meta.url`]: #importmetaurl
 [`import`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/import
 [`module.createRequire()`]: module.md#modulecreaterequirefilename
 [`module.syncBuiltinESMExports()`]: module.md#modulesyncbuiltinesmexports
 [`package.json`]: packages.md#nodejs-packagejson-field-definitions
+[`path.dirname()`]: path.md#pathdirnamepath
 [`process.dlopen`]: process.md#processdlopenmodule-filename-flags
+[`url.fileURLToPath()`]: url.md#urlfileurltopathurl
 [cjs-module-lexer]: https://github.com/nodejs/cjs-module-lexer/tree/1.2.2
 [commonjs-extension-resolution-loader]: https://github.com/nodejs/loaders-test/tree/main/commonjs-extension-resolution-loader
 [custom https loader]: module.md#import-from-https
diff --git a/lib/internal/modules/esm/initialize_import_meta.js b/lib/internal/modules/esm/initialize_import_meta.js
index f55f60a5b7647a..818c99479cd068 100644
--- a/lib/internal/modules/esm/initialize_import_meta.js
+++ b/lib/internal/modules/esm/initialize_import_meta.js
@@ -1,6 +1,9 @@
 'use strict';
 
+const { StringPrototypeStartsWith } = primordials;
 const { getOptionValue } = require('internal/options');
+const { fileURLToPath } = require('internal/url');
+const { dirname } = require('path');
 const experimentalImportMetaResolve = getOptionValue('--experimental-import-meta-resolve');
 
 /**
@@ -45,12 +48,20 @@ function createImportMetaResolve(defaultParentURL, loader, allowParentURL) {
  * @param {object} meta
  * @param {{url: string}} context
  * @param {typeof import('./loader.js').ModuleLoader} loader Reference to the current module loader
- * @returns {{url: string, resolve?: Function}}
+ * @returns {{dirname?: string, filename?: string, url: string, resolve?: Function}}
  */
 function initializeImportMeta(meta, context, loader) {
   const { url } = context;
 
   // Alphabetical
+  if (StringPrototypeStartsWith(url, 'file:') === true) {
+    // These only make sense for locally loaded modules,
+    // i.e. network modules are not supported.
+    const filePath = fileURLToPath(url);
+    meta.dirname = dirname(filePath);
+    meta.filename = filePath;
+  }
+
   if (!loader || loader.allowImportMetaResolve) {
     meta.resolve = createImportMetaResolve(url, loader, experimentalImportMetaResolve);
   }
diff --git a/test/es-module/test-esm-import-meta.mjs b/test/es-module/test-esm-import-meta.mjs
index 4c5aa902d4a970..50d16a3438a851 100644
--- a/test/es-module/test-esm-import-meta.mjs
+++ b/test/es-module/test-esm-import-meta.mjs
@@ -3,7 +3,7 @@ import assert from 'assert';
 
 assert.strictEqual(Object.getPrototypeOf(import.meta), null);
 
-const keys = ['resolve', 'url'];
+const keys = ['dirname', 'filename', 'resolve', 'url'];
 assert.deepStrictEqual(Reflect.ownKeys(import.meta), keys);
 
 const descriptors = Object.getOwnPropertyDescriptors(import.meta);
@@ -18,3 +18,17 @@ for (const descriptor of Object.values(descriptors)) {
 
 const urlReg = /^file:\/\/\/.*\/test\/es-module\/test-esm-import-meta\.mjs$/;
 assert(import.meta.url.match(urlReg));
+
+// Match *nix paths: `/some/path/test/es-module`
+// Match Windows paths: `d:\\some\\path\\test\\es-module`
+const dirReg = /^(\/|\w:\\).*(\/|\\)test(\/|\\)es-module$/;
+assert.match(import.meta.dirname, dirReg);
+
+// Match *nix paths: `/some/path/test/es-module/test-esm-import-meta.mjs`
+// Match Windows paths: `d:\\some\\path\\test\\es-module\\test-esm-import-meta.js`
+const fileReg = /^(\/|\w:\\).*(\/|\\)test(\/|\\)es-module(\/|\\)test-esm-import-meta\.mjs$/;
+assert.match(import.meta.filename, fileReg);
+
+// Verify that `data:` imports do not behave like `file:` imports.
+import dataDirname from 'data:text/javascript,export default "dirname" in import.meta';
+assert.strictEqual(dataDirname, false);
diff --git a/test/js-native-api/test_cannot_run_js/entry_point.c b/test/js-native-api/test_cannot_run_js/entry_point.c
new file mode 100644
index 00000000000000..6b7b50a38c9535
--- /dev/null
+++ b/test/js-native-api/test_cannot_run_js/entry_point.c
@@ -0,0 +1,7 @@
+#include 
+
+EXTERN_C_START
+napi_value Init(napi_env env, napi_value exports);
+EXTERN_C_END
+
+NAPI_MODULE(NODE_GYP_MODULE_NAME, Init)

From 6012e3e781bac74e3d8c94f569f44a4239da3545 Mon Sep 17 00:00:00 2001
From: Robert Nagy 
Date: Thu, 26 Oct 2023 15:52:53 +0200
Subject: [PATCH 058/229] stream: fix Writable.destroy performance regression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ref: https://github.com/nodejs/node/pull/50409
PR-URL: https://github.com/nodejs/node/pull/50478
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: Antoine du Hamel 
---
 lib/internal/streams/writable.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/internal/streams/writable.js b/lib/internal/streams/writable.js
index 17fc7bbbbf5b65..e55ddc1796cf6c 100644
--- a/lib/internal/streams/writable.js
+++ b/lib/internal/streams/writable.js
@@ -1105,7 +1105,7 @@ Writable.prototype.destroy = function(err, cb) {
   const state = this._writableState;
 
   // Invoke pending callbacks.
-  if ((state[kState] & (kBuffered | kOnFinished | kDestroyed)) !== kDestroyed) {
+  if ((state[kState] & (kBuffered | kOnFinished)) !== 0 && (state[kState] & kDestroyed) === 0) {
     process.nextTick(errorBuffer, state);
   }
 

From c6c0a74b542e102a6ab7fdc13ffa52e7415f5144 Mon Sep 17 00:00:00 2001
From: Guy Bedford 
Date: Wed, 25 Oct 2023 10:56:26 -0700
Subject: [PATCH 059/229] wasi: document security sandboxing status

PR-URL: https://github.com/nodejs/node/pull/50396
Reviewed-By: Michael Dawson 
Reviewed-By: James M Snell 
---
 doc/api/wasi.md | 45 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/doc/api/wasi.md b/doc/api/wasi.md
index f67187b7bcedb8..f590bd16fbbd2a 100644
--- a/doc/api/wasi.md
+++ b/doc/api/wasi.md
@@ -4,11 +4,16 @@
 
 > Stability: 1 - Experimental
 
+The `node:wasi` module does not currently provide the
+comprehensive file system security properties provided by some WASI runtimes.
+Full support for secure file system sandboxing may or may not be implemented in
+future. In the mean time, do not rely on it to run untrusted code. 
+
 
 
 The WASI API provides an implementation of the [WebAssembly System Interface][]
-specification. WASI gives sandboxed WebAssembly applications access to the
-underlying operating system via a collection of POSIX-like functions.
+specification. WASI gives WebAssembly applications access to the underlying
+operating system via a collection of POSIX-like functions.
 
 ```mjs
 import { readFile } from 'node:fs/promises';
@@ -20,7 +25,7 @@ const wasi = new WASI({
   args: argv,
   env,
   preopens: {
-    '/sandbox': '/some/real/path/that/wasm/can/access',
+    '/local': '/some/real/path/that/wasm/can/access',
   },
 });
 
@@ -44,7 +49,7 @@ const wasi = new WASI({
   args: argv,
   env,
   preopens: {
-    '/sandbox': '/some/real/path/that/wasm/can/access',
+    '/local': '/some/real/path/that/wasm/can/access',
   },
 });
 
@@ -97,6 +102,28 @@ Use [wabt](https://github.com/WebAssembly/wabt) to compile `.wat` to `.wasm`
 wat2wasm demo.wat
 ```
 
+## Security
+
+
+
+WASI provides a capabilities-based model through which applications are provided
+their own custom `env`, `preopens`, `stdin`, `stdout`, `stderr`, and `exit`
+capabilities.
+
+**The current Node.js threat model does not provide secure sandboxing as is
+present in some WASI runtimes.**
+
+While the capability features are supported, they do not form a security model
+in Node.js. For example, the file system sandboxing can be escaped with various
+techniques. The project is exploring whether these security guarantees could be
+added in future.
+
 ## Class: `WASI`
 
 
+
+A number of milliseconds the test execution will fail after. If unspecified,
+subtests inherit this value from their parent. The default value is `Infinity`.
+
 ### `--throw-deprecation`
 
  '\000' ASCII (0) || \0
       // ---> er
       // ---> n
-      bool IsEndNode() {
+      bool IsEndNode() const {
         if (children.size() == 0) {
           return true;
         }
@@ -126,8 +127,8 @@ class FSPermission final : public PermissionBase {
     RadixTree();
     ~RadixTree();
     void Insert(const std::string& s);
-    bool Lookup(const std::string_view& s) { return Lookup(s, false); }
-    bool Lookup(const std::string_view& s, bool when_empty_return);
+    bool Lookup(const std::string_view& s) const { return Lookup(s, false); }
+    bool Lookup(const std::string_view& s, bool when_empty_return) const;
 
    private:
     Node* root_node_;
diff --git a/src/permission/inspector_permission.cc b/src/permission/inspector_permission.cc
index 401d801ac0adb5..34e55db4bef590 100644
--- a/src/permission/inspector_permission.cc
+++ b/src/permission/inspector_permission.cc
@@ -14,7 +14,7 @@ void InspectorPermission::Apply(const std::vector& allow,
 }
 
 bool InspectorPermission::is_granted(PermissionScope perm,
-                                     const std::string_view& param) {
+                                     const std::string_view& param) const {
   return deny_all_ == false;
 }
 
diff --git a/src/permission/inspector_permission.h b/src/permission/inspector_permission.h
index e5c6d1b81677f5..fc8b357c4fcb15 100644
--- a/src/permission/inspector_permission.h
+++ b/src/permission/inspector_permission.h
@@ -15,7 +15,7 @@ class InspectorPermission final : public PermissionBase {
   void Apply(const std::vector& allow,
              PermissionScope scope) override;
   bool is_granted(PermissionScope perm,
-                  const std::string_view& param = "") override;
+                  const std::string_view& param = "") const override;
 
  private:
   bool deny_all_;
diff --git a/src/permission/permission_base.h b/src/permission/permission_base.h
index c2f377424f6fc5..b817efb88a76bb 100644
--- a/src/permission/permission_base.h
+++ b/src/permission/permission_base.h
@@ -42,7 +42,7 @@ class PermissionBase {
   virtual void Apply(const std::vector& allow,
                      PermissionScope scope) = 0;
   virtual bool is_granted(PermissionScope perm,
-                          const std::string_view& param = "") = 0;
+                          const std::string_view& param = "") const = 0;
 };
 
 }  // namespace permission
diff --git a/src/permission/worker_permission.cc b/src/permission/worker_permission.cc
index a18938e5fe1efd..78293589faa3bd 100644
--- a/src/permission/worker_permission.cc
+++ b/src/permission/worker_permission.cc
@@ -15,7 +15,7 @@ void WorkerPermission::Apply(const std::vector& allow,
 }
 
 bool WorkerPermission::is_granted(PermissionScope perm,
-                                  const std::string_view& param) {
+                                  const std::string_view& param) const {
   return deny_all_ == false;
 }
 
diff --git a/src/permission/worker_permission.h b/src/permission/worker_permission.h
index cdc224925c2291..99d6b039ee22c1 100644
--- a/src/permission/worker_permission.h
+++ b/src/permission/worker_permission.h
@@ -15,7 +15,7 @@ class WorkerPermission final : public PermissionBase {
   void Apply(const std::vector& allow,
              PermissionScope scope) override;
   bool is_granted(PermissionScope perm,
-                  const std::string_view& param = "") override;
+                  const std::string_view& param = "") const override;
 
  private:
   bool deny_all_;

From 3c79e3cdba8101285b8055eaa0eb7bea5bbe5a23 Mon Sep 17 00:00:00 2001
From: Jungku Lee 
Date: Fri, 17 Nov 2023 22:49:26 +0900
Subject: [PATCH 147/229] doc: update print results(detail) in
 `PerformanceEntry`

PR-URL: https://github.com/nodejs/node/pull/50723
Reviewed-By: Luigi Pinca 
Reviewed-By: Deokjin Kim 
---
 doc/api/perf_hooks.md | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/doc/api/perf_hooks.md b/doc/api/perf_hooks.md
index 8323b9ab03fcd0..040b8eaa52027b 100644
--- a/doc/api/perf_hooks.md
+++ b/doc/api/perf_hooks.md
@@ -1328,13 +1328,15 @@ const obs = new PerformanceObserver((perfObserverList, observer) => {
    *     name: 'test',
    *     entryType: 'mark',
    *     startTime: 81.465639,
-   *     duration: 0
+   *     duration: 0,
+   *     detail: null
    *   },
    *   PerformanceEntry {
    *     name: 'meow',
    *     entryType: 'mark',
    *     startTime: 81.860064,
-   *     duration: 0
+   *     duration: 0,
+   *     detail: null
    *   }
    * ]
    */
@@ -1378,7 +1380,8 @@ const obs = new PerformanceObserver((perfObserverList, observer) => {
    *     name: 'meow',
    *     entryType: 'mark',
    *     startTime: 98.545991,
-   *     duration: 0
+   *     duration: 0,
+   *     detail: null
    *   }
    * ]
    */
@@ -1391,7 +1394,8 @@ const obs = new PerformanceObserver((perfObserverList, observer) => {
    *     name: 'test',
    *     entryType: 'mark',
    *     startTime: 63.518931,
-   *     duration: 0
+   *     duration: 0,
+   *     detail: null
    *   }
    * ]
    */
@@ -1434,13 +1438,15 @@ const obs = new PerformanceObserver((perfObserverList, observer) => {
    *     name: 'test',
    *     entryType: 'mark',
    *     startTime: 55.897834,
-   *     duration: 0
+   *     duration: 0,
+   *     detail: null
    *   },
    *   PerformanceEntry {
    *     name: 'meow',
    *     entryType: 'mark',
    *     startTime: 56.350146,
-   *     duration: 0
+   *     duration: 0,
+   *     detail: null
    *   }
    * ]
    */

From 019efa8a5a789cff9767517e3b04f89a58dc67d1 Mon Sep 17 00:00:00 2001
From: Cheng Zhao 
Date: Sat, 18 Nov 2023 04:52:00 +0900
Subject: [PATCH 148/229] build: fix GN configuration for deps/base64

PR-URL: https://github.com/nodejs/node/pull/50696
Reviewed-By: Luigi Pinca 
Reviewed-By: Marco Ippolito 
Reviewed-By: Jiawen Geng 
---
 deps/base64/unofficial.gni | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/deps/base64/unofficial.gni b/deps/base64/unofficial.gni
index 8138d88798e1df..269c62d976d6ad 100644
--- a/deps/base64/unofficial.gni
+++ b/deps/base64/unofficial.gni
@@ -32,6 +32,7 @@ template("base64_gn_build") {
         "HAVE_SSE42=1",
         "HAVE_AVX=1",
         "HAVE_AVX2=1",
+        "HAVE_AVX512=1",
       ]
     }
     if (target_cpu == "arm") {
@@ -65,6 +66,7 @@ template("base64_gn_build") {
       ":base64_sse42",
       ":base64_avx",
       ":base64_avx2",
+      ":base64_avx512",
       ":base64_neon32",
       ":base64_neon64",
     ]
@@ -111,6 +113,7 @@ template("base64_gn_build") {
       }
     }
   }
+
   source_set("base64_avx2") {
     configs += [ ":base64_internal_config" ]
     sources = [ "base64/lib/arch/avx2/codec.c" ]
@@ -123,6 +126,21 @@ template("base64_gn_build") {
     }
   }
 
+  source_set("base64_avx512") {
+    configs += [ ":base64_internal_config" ]
+    sources = [ "base64/lib/arch/avx512/codec.c" ]
+    if (target_cpu == "x86" || target_cpu == "x64") {
+      if (is_clang || !is_win) {
+        cflags_c = [
+          "-mavx512vl",
+          "-mavx512vbmi",
+        ]
+      } else if (is_win) {
+        cflags_c = [ "/arch:AVX512" ]
+      }
+    }
+  }
+
   source_set("base64_neon32") {
     configs += [ ":base64_internal_config" ]
     sources = [ "base64/lib/arch/neon32/codec.c" ]

From 660e70e73bd7d0b68d6ba07ffc1ac2fabae053e6 Mon Sep 17 00:00:00 2001
From: Joyee Cheung 
Date: Fri, 17 Nov 2023 21:00:30 +0100
Subject: [PATCH 149/229] test: skip parallel/test-macos-app-sandbox if disk
 space < 120MB

It needs to copy the Node.js binary which is currently almost
100MB. To be safe, skip the test when the available disk space
is smaller than 120MB.

PR-URL: https://github.com/nodejs/node/pull/50764
Reviewed-By: Debadree Chatterjee 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Luigi Pinca 
---
 test/parallel/test-macos-app-sandbox.js | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/parallel/test-macos-app-sandbox.js b/test/parallel/test-macos-app-sandbox.js
index 91485b75983f84..60ad67b3db3790 100644
--- a/test/parallel/test-macos-app-sandbox.js
+++ b/test/parallel/test-macos-app-sandbox.js
@@ -17,6 +17,10 @@ const nodeBinary = process.execPath;
 
 tmpdir.refresh();
 
+if (!tmpdir.hasEnoughSpace(120 * 1024 * 1024)) {
+  common.skip('Available disk space < 120MB');
+}
+
 const appBundlePath = tmpdir.resolve('node_sandboxed.app');
 const appBundleContentPath = path.join(appBundlePath, 'Contents');
 const appExecutablePath = path.join(

From be54a2286940c6c8b4d2608efc28b778559a3248 Mon Sep 17 00:00:00 2001
From: Joyee Cheung 
Date: Fri, 17 Nov 2023 22:42:21 +0100
Subject: [PATCH 150/229] test: improve test-bootstrap-modules.js

Divide builtins into two lists depending on whether
they are loaded before pre-execution or at run time,
and give clearer suggestions about how to deal with them
based on the category they are in.

This helps preventing regressions like the one reported
in https://github.com/nodejs/node/issues/45662.

PR-URL: https://github.com/nodejs/node/pull/50708
Reviewed-By: Antoine du Hamel 
Reviewed-By: Richard Lau 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Chengzhong Wu 
---
 test/parallel/test-bootstrap-modules.js | 128 +++++++++++++++++++-----
 1 file changed, 103 insertions(+), 25 deletions(-)

diff --git a/test/parallel/test-bootstrap-modules.js b/test/parallel/test-bootstrap-modules.js
index e4561d746606ed..047e4fbeaa1e67 100644
--- a/test/parallel/test-bootstrap-modules.js
+++ b/test/parallel/test-bootstrap-modules.js
@@ -1,14 +1,27 @@
-// Flags: --expose-internals
 'use strict';
 
-// This list must be computed before we require any modules to
+// This list must be computed before we require any builtins to
 // to eliminate the noise.
-const actualModules = new Set(process.moduleLoadList.slice());
+const list = process.moduleLoadList.slice();
 
 const common = require('../common');
 const assert = require('assert');
+const { inspect } = require('util');
 
-const expectedModules = new Set([
+const preExecIndex =
+  list.findIndex((i) => i.includes('pre_execution'));
+const actual = {
+  beforePreExec: new Set(list.slice(0, preExecIndex)),
+  atRunTime: new Set(list.slice(preExecIndex)),
+};
+
+// Currently, we don't add additional builtins to worker snapshots.
+// So for worker snapshots we'll just concatenate the two. Once we
+// add more builtins to worker snapshots, we should also distinguish
+// the two stages for them.
+const expected = {};
+
+expected.beforePreExec = new Set([
   'Internal Binding builtins',
   'Internal Binding encoding_binding',
   'Internal Binding errors',
@@ -84,22 +97,25 @@ const expectedModules = new Set([
   'NativeModule internal/modules/package_json_reader',
   'Internal Binding module_wrap',
   'NativeModule internal/modules/cjs/loader',
-  'NativeModule internal/vm/module',
-  'NativeModule internal/modules/esm/utils',
+]);
+
+expected.atRunTime = new Set([
   'Internal Binding wasm_web_api',
   'Internal Binding worker',
   'NativeModule internal/modules/run_main',
   'NativeModule internal/net',
   'NativeModule internal/dns/utils',
   'NativeModule internal/process/pre_execution',
+  'NativeModule internal/vm/module',
+  'NativeModule internal/modules/esm/utils',
 ]);
 
 if (common.isMainThread) {
   [
     'NativeModule internal/idna',
     'NativeModule url',
-  ].forEach(expectedModules.add.bind(expectedModules));
-} else {
+  ].forEach(expected.beforePreExec.add.bind(expected.beforePreExec));
+} else {  // Worker.
   [
     'NativeModule diagnostics_channel',
     'NativeModule internal/abort_controller',
@@ -127,35 +143,97 @@ if (common.isMainThread) {
     'NativeModule stream/promises',
     'NativeModule string_decoder',
     'NativeModule worker_threads',
-  ].forEach(expectedModules.add.bind(expectedModules));
+  ].forEach(expected.atRunTime.add.bind(expected.atRunTime));
+  // For now we'll concatenate the two stages for workers. We prefer
+  // atRunTime here because that's what currently happens for these.
 }
 
 if (common.isWindows) {
   // On Windows fs needs SideEffectFreeRegExpPrototypeExec which uses vm.
-  expectedModules.add('NativeModule vm');
+  expected.atRunTime.add('NativeModule vm');
 }
 
 if (common.hasIntl) {
-  expectedModules.add('Internal Binding icu');
+  expected.beforePreExec.add('Internal Binding icu');
 }
 
 if (process.features.inspector) {
-  expectedModules.add('Internal Binding inspector');
-  expectedModules.add('NativeModule internal/inspector_async_hook');
-  expectedModules.add('NativeModule internal/util/inspector');
+  expected.beforePreExec.add('Internal Binding inspector');
+  expected.beforePreExec.add('NativeModule internal/util/inspector');
+  expected.atRunTime.add('NativeModule internal/inspector_async_hook');
 }
 
 const difference = (setA, setB) => {
   return new Set([...setA].filter((x) => !setB.has(x)));
 };
-const missingModules = difference(expectedModules, actualModules);
-const extraModules = difference(actualModules, expectedModules);
-const printSet = (s) => { return `${[...s].sort().join(',\n  ')}\n`; };
-
-assert.deepStrictEqual(actualModules, expectedModules,
-                       (missingModules.size > 0 ?
-                         'These modules were not loaded:\n  ' +
-                         printSet(missingModules) : '') +
-                       (extraModules.size > 0 ?
-                         'These modules were unexpectedly loaded:\n  ' +
-                         printSet(extraModules) : ''));
+
+// Accumulate all the errors and print them at the end instead of throwing
+// immediately which makes it harder to update the test.
+const errorLogs = [];
+function err(message) {
+  if (typeof message === 'string') {
+    errorLogs.push(message);
+  } else {
+    // Show the items in individual lines for easier copy-pasting.
+    errorLogs.push(inspect(message, { compact: false }));
+  }
+}
+
+if (common.isMainThread) {
+  const missing = difference(expected.beforePreExec, actual.beforePreExec);
+  const extra = difference(actual.beforePreExec, expected.beforePreExec);
+  if (missing.size !== 0) {
+    err('These builtins are now no longer loaded before pre-execution.');
+    err('If this is intentional, remove them from `expected.beforePreExec`.');
+    err('\n--- These could be removed from expected.beforePreExec ---');
+    err([...missing].sort());
+    err('');
+  }
+  if (extra.size !== 0) {
+    err('These builtins are now unexpectedly loaded before pre-execution.');
+    err('If this is intentional, add them to `expected.beforePreExec`.');
+    err('\n# Note: loading more builtins before pre-execution can lead to ' +
+        'startup performance regression or invalid snapshots.');
+    err('- Consider lazy loading builtins that are not used universally.');
+    err('- Make sure that the builtins do not access environment dependent ' +
+        'states e.g. command line arguments or environment variables ' +
+        'during loading.');
+    err('- When in doubt, ask @nodejs/startup.');
+    err('\n--- These could be added to expected.beforePreExec ---');
+    err([...extra].sort());
+    err('');
+  }
+}
+
+if (!common.isMainThread) {
+  // For workers, just merge beforePreExec into atRunTime for now.
+  // When we start adding modules to the worker snapshot, this branch
+  // can be removed and  we can just remove the common.isMainThread
+  // conditions.
+  expected.beforePreExec.forEach(expected.atRunTime.add.bind(expected.atRunTime));
+  actual.beforePreExec.forEach(actual.atRunTime.add.bind(actual.atRunTime));
+}
+
+{
+  const missing = difference(expected.atRunTime, actual.atRunTime);
+  const extra = difference(actual.atRunTime, expected.atRunTime);
+  if (missing.size !== 0) {
+    err('These builtins are now no longer loaded at run time.');
+    err('If this is intentional, remove them from `expected.atRunTime`.');
+    err('\n--- These could be removed from expected.atRunTime ---');
+    err([...missing].sort());
+    err('');
+  }
+  if (extra.size !== 0) {
+    err('These builtins are now unexpectedly loaded at run time.');
+    err('If this is intentional, add them to `expected.atRunTime`.');
+    err('\n# Note: loading more builtins at run time can lead to ' +
+        'startup performance regression.');
+    err('- Consider lazy loading builtins that are not used universally.');
+    err('\n--- These could be added to expected.atRunTime ---');
+    err([...extra].sort());
+    err('');
+  }
+}
+
+assert.strictEqual(errorLogs.length, 0, errorLogs.join('\n'));

From a7d8f6b5290aa60296cdb8b932e2341e7898efaa Mon Sep 17 00:00:00 2001
From: Joyee Cheung 
Date: Sat, 18 Nov 2023 00:59:26 +0100
Subject: [PATCH 151/229] perf_hooks: implement performance.now() with fast API
 calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50492
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Bryan English 
Reviewed-By: Chengzhong Wu 
Reviewed-By: Benjamin Gruenbaum 
Reviewed-By: Stephen Belanger 
---
 benchmark/perf_hooks/now.js   | 23 +++++++++++++++++++++++
 lib/internal/perf/utils.js    |  7 +------
 src/node_external_reference.h |  3 +++
 src/node_perf.cc              | 21 +++++++++++++++++++++
 4 files changed, 48 insertions(+), 6 deletions(-)
 create mode 100644 benchmark/perf_hooks/now.js

diff --git a/benchmark/perf_hooks/now.js b/benchmark/perf_hooks/now.js
new file mode 100644
index 00000000000000..7132196c430125
--- /dev/null
+++ b/benchmark/perf_hooks/now.js
@@ -0,0 +1,23 @@
+'use strict';
+
+const assert = require('assert');
+const common = require('../common.js');
+
+const bench = common.createBenchmark(main, {
+  n: [1e6],
+});
+
+function main({ n }) {
+  const arr = [];
+  for (let i = 0; i < n; ++i) {
+    arr.push(performance.now());
+  }
+
+  bench.start();
+  for (let i = 0; i < n; i++) {
+    arr[i] = performance.now();
+  }
+  bench.end(n);
+
+  assert.strictEqual(arr.length, n);
+}
diff --git a/lib/internal/perf/utils.js b/lib/internal/perf/utils.js
index a0b7955c70481c..bbc1c996e318f8 100644
--- a/lib/internal/perf/utils.js
+++ b/lib/internal/perf/utils.js
@@ -5,6 +5,7 @@ const {
     NODE_PERFORMANCE_MILESTONE_TIME_ORIGIN,
   },
   milestones,
+  now,
 } = internalBinding('performance');
 
 function getTimeOrigin() {
@@ -13,12 +14,6 @@ function getTimeOrigin() {
   return milestones[NODE_PERFORMANCE_MILESTONE_TIME_ORIGIN] / 1e6;
 }
 
-// Returns the time relative to the process start time in milliseconds.
-function now() {
-  const hr = process.hrtime();
-  return (hr[0] * 1000 + hr[1] / 1e6) - getTimeOrigin();
-}
-
 // Returns the milestone relative to the process start time in milliseconds.
 function getMilestoneTimestamp(milestoneIdx) {
   const ns = milestones[milestoneIdx];
diff --git a/src/node_external_reference.h b/src/node_external_reference.h
index ae37094c8e117e..541b73499869e3 100644
--- a/src/node_external_reference.h
+++ b/src/node_external_reference.h
@@ -15,6 +15,8 @@ using CFunctionCallbackWithOneByteString =
 using CFunctionCallback = void (*)(v8::Local receiver);
 using CFunctionCallbackReturnDouble =
     double (*)(v8::Local receiver);
+using CFunctionCallbackValueReturnDouble =
+    double (*)(v8::Local receiver);
 using CFunctionCallbackWithInt64 = void (*)(v8::Local receiver,
                                             int64_t);
 using CFunctionCallbackWithBool = void (*)(v8::Local receiver,
@@ -38,6 +40,7 @@ class ExternalReferenceRegistry {
   V(CFunctionCallback)                                                         \
   V(CFunctionCallbackWithOneByteString)                                        \
   V(CFunctionCallbackReturnDouble)                                             \
+  V(CFunctionCallbackValueReturnDouble)                                        \
   V(CFunctionCallbackWithInt64)                                                \
   V(CFunctionCallbackWithBool)                                                 \
   V(CFunctionCallbackWithString)                                               \
diff --git a/src/node_perf.cc b/src/node_perf.cc
index 360cc8bf673073..603af57b2639b2 100644
--- a/src/node_perf.cc
+++ b/src/node_perf.cc
@@ -291,6 +291,22 @@ void MarkBootstrapComplete(const FunctionCallbackInfo& args) {
       performance::NODE_PERFORMANCE_MILESTONE_BOOTSTRAP_COMPLETE);
 }
 
+static double PerformanceNowImpl() {
+  return static_cast(uv_hrtime() - performance_process_start) /
+         NANOS_PER_MILLIS;
+}
+
+static double FastPerformanceNow(v8::Local receiver) {
+  return PerformanceNowImpl();
+}
+
+static void SlowPerformanceNow(const FunctionCallbackInfo& args) {
+  args.GetReturnValue().Set(PerformanceNowImpl());
+}
+
+static v8::CFunction fast_performance_now(
+    v8::CFunction::Make(FastPerformanceNow));
+
 static void CreatePerIsolateProperties(IsolateData* isolate_data,
                                        Local target) {
   Isolate* isolate = isolate_data->isolate();
@@ -311,6 +327,8 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data,
   SetMethod(isolate, target, "getTimeOriginTimestamp", GetTimeOriginTimeStamp);
   SetMethod(isolate, target, "createELDHistogram", CreateELDHistogram);
   SetMethod(isolate, target, "markBootstrapComplete", MarkBootstrapComplete);
+  SetFastMethodNoSideEffect(
+      isolate, target, "now", SlowPerformanceNow, &fast_performance_now);
 }
 
 void CreatePerContextProperties(Local target,
@@ -376,6 +394,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
   registry->Register(GetTimeOriginTimeStamp);
   registry->Register(CreateELDHistogram);
   registry->Register(MarkBootstrapComplete);
+  registry->Register(SlowPerformanceNow);
+  registry->Register(FastPerformanceNow);
+  registry->Register(fast_performance_now.GetTypeInfo());
   HistogramBase::RegisterExternalReferences(registry);
   IntervalHistogram::RegisterExternalReferences(registry);
 }

From be6ad3f375b39e3f679295be80900009b78807d6 Mon Sep 17 00:00:00 2001
From: Joyee Cheung 
Date: Sat, 18 Nov 2023 01:46:42 +0100
Subject: [PATCH 152/229] benchmark: rewrite import.meta benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a ESM benchmark, rewrite it so that we are directly
benchmarking the ESM import.meta paths and using number of
loads for op/s calculation, instead of doing it in startup
benchmarks and nesting number of process/workers spawn
for op/s calculation.

PR-URL: https://github.com/nodejs/node/pull/50683
Reviewed-By: James M Snell 
Reviewed-By: Vinícius Lourenço Claro Cardoso 
---
 benchmark/esm/import-meta.js            | 32 +++++++++++++++++++++++++
 benchmark/fixtures/esm-dir-file.mjs     |  5 ++--
 benchmark/fixtures/load-esm-dir-file.js |  5 ----
 benchmark/misc/startup.js               |  1 -
 4 files changed, 34 insertions(+), 9 deletions(-)
 create mode 100644 benchmark/esm/import-meta.js
 delete mode 100644 benchmark/fixtures/load-esm-dir-file.js

diff --git a/benchmark/esm/import-meta.js b/benchmark/esm/import-meta.js
new file mode 100644
index 00000000000000..fd371cf60ed6a3
--- /dev/null
+++ b/benchmark/esm/import-meta.js
@@ -0,0 +1,32 @@
+'use strict';
+
+const path = require('path');
+const { pathToFileURL, fileURLToPath } = require('url');
+const common = require('../common');
+const assert = require('assert');
+const bench = common.createBenchmark(main, {
+  n: [1000],
+});
+
+const file = pathToFileURL(
+  path.resolve(__filename, '../../fixtures/esm-dir-file.mjs'),
+);
+async function load(array, n) {
+  for (let i = 0; i < n; i++) {
+    array[i] = await import(`${file}?i=${i}`);
+  }
+  return array;
+}
+
+function main({ n }) {
+  const array = [];
+  for (let i = 0; i < n; ++i) {
+    array.push({ dirname: '', filename: '', i: 0 });
+  }
+
+  bench.start();
+  load(array, n).then((arr) => {
+    bench.end(n);
+    assert.strictEqual(arr[n - 1].filename, fileURLToPath(file));
+  });
+}
diff --git a/benchmark/fixtures/esm-dir-file.mjs b/benchmark/fixtures/esm-dir-file.mjs
index d5dafc5c46697e..a1028d57afd56b 100644
--- a/benchmark/fixtures/esm-dir-file.mjs
+++ b/benchmark/fixtures/esm-dir-file.mjs
@@ -1,3 +1,2 @@
-import assert from 'assert';
-assert.ok(import.meta.dirname);
-assert.ok(import.meta.filename);
+export const dirname = import.meta.dirname;
+export const filename = import.meta.filename;
diff --git a/benchmark/fixtures/load-esm-dir-file.js b/benchmark/fixtures/load-esm-dir-file.js
deleted file mode 100644
index a4115dd39e0489..00000000000000
--- a/benchmark/fixtures/load-esm-dir-file.js
+++ /dev/null
@@ -1,5 +0,0 @@
-(async function () {
-  for (let i = 0; i < 1000; i += 1) {
-    await import(`./esm-dir-file.mjs?i=${i}`);
-  }
-}());
diff --git a/benchmark/misc/startup.js b/benchmark/misc/startup.js
index f55be1a7902e4f..07c0701d128899 100644
--- a/benchmark/misc/startup.js
+++ b/benchmark/misc/startup.js
@@ -9,7 +9,6 @@ const bench = common.createBenchmark(main, {
   script: [
     'benchmark/fixtures/require-builtins',
     'test/fixtures/semicolon',
-    'benchmark/fixtures/load-esm-dir-file',
   ],
   mode: ['process', 'worker'],
   count: [30],

From 7be1222c4a7f6b872ba49c559059d4f3a28fa499 Mon Sep 17 00:00:00 2001
From: "Node.js GitHub Bot" 
Date: Sat, 18 Nov 2023 14:47:21 +0100
Subject: [PATCH 153/229] deps: update simdutf to 4.0.4

PR-URL: https://github.com/nodejs/node/pull/50772
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Luigi Pinca 
---
 deps/simdutf/simdutf.cpp                      | 17606 ++++++++++------
 deps/simdutf/simdutf.h                        |  1071 +-
 .../maintaining/maintaining-dependencies.md   |     6 +-
 3 files changed, 12040 insertions(+), 6643 deletions(-)

diff --git a/deps/simdutf/simdutf.cpp b/deps/simdutf/simdutf.cpp
index 70b461ab550b46..7a6e9a0efd190b 100644
--- a/deps/simdutf/simdutf.cpp
+++ b/deps/simdutf/simdutf.cpp
@@ -1,8 +1,6 @@
-/* auto-generated on 2023-10-08 13:48:09 -0400. Do not edit! */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf.cpp
+/* auto-generated on 2023-11-15 17:34:03 -0500. Do not edit! */
 /* begin file src/simdutf.cpp */
 #include "simdutf.h"
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=implementation.cpp
 /* begin file src/implementation.cpp */
 #include 
 #include 
@@ -26,7 +24,6 @@ std::string toBinaryString(T b) {
 
 // Implementations
 // The best choice should always come first!
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64.h
 /* begin file src/simdutf/arm64.h */
 #ifndef SIMDUTF_ARM64_H
 #define SIMDUTF_ARM64_H
@@ -53,7 +50,6 @@ namespace arm64 {
 } // namespace arm64
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/implementation.h
 /* begin file src/simdutf/arm64/implementation.h */
 #ifndef SIMDUTF_ARM64_IMPLEMENTATION_H
 #define SIMDUTF_ARM64_IMPLEMENTATION_H
@@ -80,6 +76,13 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
@@ -89,12 +92,21 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
@@ -122,6 +134,13 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
+
 };
 
 } // namespace arm64
@@ -130,14 +149,12 @@ class implementation final : public simdutf::implementation {
 #endif // SIMDUTF_ARM64_IMPLEMENTATION_H
 /* end file src/simdutf/arm64/implementation.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h
 /* begin file src/simdutf/arm64/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "arm64"
 // #define SIMDUTF_IMPLEMENTATION arm64
 /* end file src/simdutf/arm64/begin.h */
 
 // Declarations
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/intrinsics.h
 /* begin file src/simdutf/arm64/intrinsics.h */
 #ifndef SIMDUTF_ARM64_INTRINSICS_H
 #define SIMDUTF_ARM64_INTRINSICS_H
@@ -149,7 +166,6 @@ class implementation final : public simdutf::implementation {
 
 #endif //  SIMDUTF_ARM64_INTRINSICS_H
 /* end file src/simdutf/arm64/intrinsics.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/bitmanipulation.h
 /* begin file src/simdutf/arm64/bitmanipulation.h */
 #ifndef SIMDUTF_ARM64_BITMANIPULATION_H
 #define SIMDUTF_ARM64_BITMANIPULATION_H
@@ -169,7 +185,6 @@ simdutf_really_inline int count_ones(uint64_t input_num) {
 
 #endif // SIMDUTF_ARM64_BITMANIPULATION_H
 /* end file src/simdutf/arm64/bitmanipulation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd.h
 /* begin file src/simdutf/arm64/simd.h */
 #ifndef SIMDUTF_ARM64_SIMD_H
 #define SIMDUTF_ARM64_SIMD_H
@@ -505,32 +520,68 @@ simdutf_really_inline int16x8_t make_int16x8_t(int16_t x1,  int16_t x2,  int16_t
     static simdutf_really_inline simd8 splat(int8_t _value) { return vmovq_n_s8(_value); }
     static simdutf_really_inline simd8 zero() { return vdupq_n_s8(0); }
     static simdutf_really_inline simd8 load(const int8_t values[16]) { return vld1q_s8(values); }
+
+    // Use ST2 instead of UXTL+UXTL2 to interleave zeroes. UXTL is actually a USHLL #0,
+    // and shifting in NEON is actually quite slow.
+    //
+    // While this needs the registers to be in a specific order, bigger cores can interleave
+    // these with no overhead, and it still performs decently on little cores.
+    //    movi  v1.3d, #0
+    //      mov   v0.16b, value[0]
+    //    st2   {v0.16b, v1.16b}, [ptr], #32
+    //      mov   v0.16b, value[1]
+    //    st2   {v0.16b, v1.16b}, [ptr], #32
+    //    ...
     template 
     simdutf_really_inline void store_ascii_as_utf16(char16_t * p) const {
-      uint16x8_t first = vmovl_u8(vget_low_u8 (vreinterpretq_u8_s8(this->value)));
-      uint16x8_t second = vmovl_high_u8(vreinterpretq_u8_s8(this->value));
-      if (!match_system(big_endian)) {
-        #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        #else
-        const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-        #endif
-        first = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(first), swap));
-        second = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(second), swap));
-      }
-      vst1q_u16(reinterpret_cast(p), first);
-      vst1q_u16(reinterpret_cast(p + 8), second);
-    }
+      int8x16x2_t pair = match_system(big_endian)
+          ? int8x16x2_t{{this->value, vmovq_n_s8(0)}}
+          : int8x16x2_t{{vmovq_n_s8(0), this->value}};
+      vst2q_s8(reinterpret_cast(p), pair);
+    }
+
+    // currently unused
+    // Technically this could be done with ST4 like in store_ascii_as_utf16, but it is
+    // very much not worth it, as explicitly mentioned in the ARM Cortex-X1 Core Software
+    // Optimization Guide:
+    //   4.18 Complex ASIMD instructions
+    //     The bandwidth of [ST4 with element size less than 64b] is limited by decode
+    //     constraints and it is advisable to avoid them when high performing code is desired.
+    // Instead, it is better to use ZIP1+ZIP2 and two ST2.
     simdutf_really_inline void store_ascii_as_utf32(char32_t * p) const {
-      vst1q_u32(reinterpret_cast(p), vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8 (vreinterpretq_u8_s8(this->value))))));
-      vst1q_u32(reinterpret_cast(p + 4), vmovl_high_u16(vmovl_u8(vget_low_u8 (vreinterpretq_u8_s8(this->value)))));
-      vst1q_u32(reinterpret_cast(p + 8), vmovl_u16(vget_low_u16(vmovl_high_u8(vreinterpretq_u8_s8(this->value)))));
-      vst1q_u32(reinterpret_cast(p + 12), vmovl_high_u16(vmovl_high_u8(vreinterpretq_u8_s8(this->value))));
+      const uint16x8_t low = vreinterpretq_u16_s8(vzip1q_s8(this->value, vmovq_n_s8(0)));
+      const uint16x8_t high = vreinterpretq_u16_s8(vzip2q_s8(this->value, vmovq_n_s8(0)));
+      const uint16x8x2_t low_pair{{ low, vmovq_n_u16(0) }};
+      vst2q_u16(reinterpret_cast(p), low_pair);
+      const uint16x8x2_t high_pair{{ high, vmovq_n_u16(0) }};
+      vst2q_u16(reinterpret_cast(p + 8), high_pair);
+    }
+
+    // In places where the table can be reused, which is most uses in simdutf, it is worth it to do
+    // 4 table lookups, as there is no direct zero extension from u8 to u32.
+    simdutf_really_inline void store_ascii_as_utf32_tbl(char32_t * p) const {
+      const simd8 tb1{  0,255,255,255,  1,255,255,255,  2,255,255,255,  3,255,255,255 };
+      const simd8 tb2{  4,255,255,255,  5,255,255,255,  6,255,255,255,  7,255,255,255 };
+      const simd8 tb3{  8,255,255,255,  9,255,255,255, 10,255,255,255, 11,255,255,255 };
+      const simd8 tb4{ 12,255,255,255, 13,255,255,255, 14,255,255,255, 15,255,255,255 };
+
+      // encourage store pairing and interleaving
+      const auto shuf1 = this->apply_lookup_16_to(tb1);
+      const auto shuf2 = this->apply_lookup_16_to(tb2);
+      shuf1.store(reinterpret_cast(p));
+      shuf2.store(reinterpret_cast(p + 4));
+
+      const auto shuf3 = this->apply_lookup_16_to(tb3);
+      const auto shuf4 = this->apply_lookup_16_to(tb4);
+      shuf3.store(reinterpret_cast(p + 8));
+      shuf4.store(reinterpret_cast(p + 12));
     }
     // Conversion from/to SIMD register
     simdutf_really_inline simd8(const int8x16_t _value) : value{_value} {}
     simdutf_really_inline operator const int8x16_t&() const { return this->value; }
+#ifndef SIMDUTF_REGULAR_VISUAL_STUDIO
     simdutf_really_inline operator const uint8x16_t() const { return vreinterpretq_u8_s8(this->value); }
+#endif
     simdutf_really_inline operator int8x16_t&() { return this->value; }
 
     // Zero constructor
@@ -627,7 +678,7 @@ simdutf_really_inline int16x8_t make_int16x8_t(int16_t x1,  int16_t x2,  int16_t
     }
 
     template
-    simdutf_really_inline simd8 apply_lookup_16_to(const simd8 original) {
+    simdutf_really_inline simd8 apply_lookup_16_to(const simd8 original) const {
       return vqtbl1q_s8(*this, simd8(original));
     }
   };
@@ -678,10 +729,10 @@ simdutf_really_inline int16x8_t make_int16x8_t(int16_t x1,  int16_t x2,  int16_t
     }
 
     simdutf_really_inline void store_ascii_as_utf32(char32_t * ptr) const {
-      this->chunks[0].store_ascii_as_utf32(ptr+sizeof(simd8)*0);
-      this->chunks[1].store_ascii_as_utf32(ptr+sizeof(simd8)*1);
-      this->chunks[2].store_ascii_as_utf32(ptr+sizeof(simd8)*2);
-      this->chunks[3].store_ascii_as_utf32(ptr+sizeof(simd8)*3);
+      this->chunks[0].store_ascii_as_utf32_tbl(ptr+sizeof(simd8)*0);
+      this->chunks[1].store_ascii_as_utf32_tbl(ptr+sizeof(simd8)*1);
+      this->chunks[2].store_ascii_as_utf32_tbl(ptr+sizeof(simd8)*2);
+      this->chunks[3].store_ascii_as_utf32_tbl(ptr+sizeof(simd8)*3);
     }
 
     simdutf_really_inline uint64_t to_bitmask() const {
@@ -782,7 +833,6 @@ simdutf_really_inline int16x8_t make_int16x8_t(int16_t x1,  int16_t x2,  int16_t
       ).to_bitmask();
     }
   }; // struct simd8x64
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/simd16-inl.h
 /* begin file src/simdutf/arm64/simd16-inl.h */
 template
 struct simd16;
@@ -869,7 +919,7 @@ struct base16_numeric: base16 {
   simdutf_really_inline simd16& operator-=(const simd16 other) { *this = *this - other; return *static_cast*>(this); }
 };
 
-// Signed words
+// Signed code units
 template<>
 struct simd16 : base16_numeric {
   simdutf_really_inline simd16() : base16_numeric() {}
@@ -899,7 +949,7 @@ struct simd16 : base16_numeric {
 
 
 
-// Unsigned words
+// Unsigned code units
 template<>
 struct simd16: base16_numeric  {
   simdutf_really_inline simd16() : base16_numeric() {}
@@ -942,19 +992,14 @@ struct simd16: base16_numeric  {
   simdutf_really_inline simd16 operator&(const simd16 other) const { return vandq_u16(*this, other); }
   simdutf_really_inline simd16 operator^(const simd16 other) const { return veorq_u16(*this, other); }
 
-  // Pack with the unsigned saturation  two uint16_t words into single uint8_t vector
+  // Pack with the unsigned saturation  two uint16_t code units into single uint8_t vector
   static simdutf_really_inline simd8 pack(const simd16& v0, const simd16& v1) {
     return vqmovn_high_u16(vqmovn_u16(v0), v1);
   }
 
   // Change the endianness
   simdutf_really_inline simd16 swap_bytes() const {
-    #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-    const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-    #else
-    const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-    #endif
-    return vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(*this), swap));
+    return vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(*this)));
   }
 };
 simdutf_really_inline simd16::operator simd16() const { return this->value; }
@@ -1095,7 +1140,6 @@ simdutf_really_inline simd16::operator simd16() const { retur
 #endif // SIMDUTF_ARM64_SIMD_H
 /* end file src/simdutf/arm64/simd.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/end.h
 /* begin file src/simdutf/arm64/end.h */
 /* end file src/simdutf/arm64/end.h */
 
@@ -1103,7 +1147,6 @@ simdutf_really_inline simd16::operator simd16() const { retur
 
 #endif // SIMDUTF_ARM64_H
 /* end file src/simdutf/arm64.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake.h
 /* begin file src/simdutf/icelake.h */
 #ifndef SIMDUTF_ICELAKE_H
 #define SIMDUTF_ICELAKE_H
@@ -1145,7 +1188,7 @@ simdutf_really_inline simd16::operator simd16() const { retur
 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
 #define SIMDUTF_TARGET_ICELAKE
 #else
-#define SIMDUTF_TARGET_ICELAKE SIMDUTF_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt")
+#define SIMDUTF_TARGET_ICELAKE SIMDUTF_TARGET_REGION("avx512f,avx512dq,avx512cd,avx512bw,avx512vbmi,avx512vbmi2,avx512vl,avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512vpopcntdq")
 #endif
 
 namespace simdutf {
@@ -1158,7 +1201,6 @@ namespace icelake {
 //
 // These two need to be included outside SIMDUTF_TARGET_REGION
 //
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/intrinsics.h
 /* begin file src/simdutf/icelake/intrinsics.h */
 #ifndef SIMDUTF_ICELAKE_INTRINSICS_H
 #define SIMDUTF_ICELAKE_INTRINSICS_H
@@ -1227,6 +1269,8 @@ SIMDUTF_POP_DISABLE_WARNINGS
 #include 
 #include 
 #include 
+#include 
+#include 
 // unfortunately, we may not get _blsr_u64, but, thankfully, clang
 // has it as a macro.
 #ifndef _blsr_u64
@@ -1268,7 +1312,6 @@ inline __m512i _mm512_set_epi8(uint8_t a0, uint8_t a1, uint8_t a2, uint8_t a3, u
 
 #endif // SIMDUTF_HASWELL_INTRINSICS_H
 /* end file src/simdutf/icelake/intrinsics.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/implementation.h
 /* begin file src/simdutf/icelake/implementation.h */
 #ifndef SIMDUTF_ICELAKE_IMPLEMENTATION_H
 #define SIMDUTF_ICELAKE_IMPLEMENTATION_H
@@ -1286,7 +1329,7 @@ class implementation final : public simdutf::implementation {
   simdutf_really_inline implementation() : simdutf::implementation(
       "icelake",
       "Intel AVX512 (AVX-512BW, AVX-512CD, AVX-512VL, AVX-512VBMI2 extensions)",
-      internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 ) {}
+      internal::instruction_set::AVX2 | internal::instruction_set::BMI1 | internal::instruction_set::BMI2 | internal::instruction_set::AVX512BW | internal::instruction_set::AVX512CD | internal::instruction_set::AVX512VL | internal::instruction_set::AVX512VBMI2 | internal::instruction_set::AVX512VPOPCNTDQ ) {}
   simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept final;
   simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) const noexcept final;
   simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) const noexcept final;
@@ -1298,6 +1341,13 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
@@ -1307,6 +1357,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
@@ -1316,6 +1372,9 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
@@ -1340,6 +1399,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
 };
 
 } // namespace icelake
@@ -1351,7 +1416,6 @@ class implementation final : public simdutf::implementation {
 //
 // The rest need to be inside the region
 //
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h
 /* begin file src/simdutf/icelake/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "icelake"
 // #define SIMDUTF_IMPLEMENTATION icelake
@@ -1367,7 +1431,6 @@ SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
 #endif // end of workaround
 /* end file src/simdutf/icelake/begin.h */
 // Declarations
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/bitmanipulation.h
 /* begin file src/simdutf/icelake/bitmanipulation.h */
 #ifndef SIMDUTF_ICELAKE_BITMANIPULATION_H
 #define SIMDUTF_ICELAKE_BITMANIPULATION_H
@@ -1393,7 +1456,6 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) {
 
 #endif // SIMDUTF_ICELAKE_BITMANIPULATION_H
 /* end file src/simdutf/icelake/bitmanipulation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h
 /* begin file src/simdutf/icelake/end.h */
 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
 // nothing needed.
@@ -1412,7 +1474,6 @@ SIMDUTF_POP_DISABLE_WARNINGS
 #endif // SIMDUTF_IMPLEMENTATION_ICELAKE
 #endif // SIMDUTF_ICELAKE_H
 /* end file src/simdutf/icelake.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell.h
 /* begin file src/simdutf/haswell.h */
 #ifndef SIMDUTF_HASWELL_H
 #define SIMDUTF_HASWELL_H
@@ -1458,7 +1519,6 @@ namespace haswell {
 //
 // These two need to be included outside SIMDUTF_TARGET_REGION
 //
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/implementation.h
 /* begin file src/simdutf/haswell/implementation.h */
 #ifndef SIMDUTF_HASWELL_IMPLEMENTATION_H
 #define SIMDUTF_HASWELL_IMPLEMENTATION_H
@@ -1488,6 +1548,13 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
@@ -1497,6 +1564,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
@@ -1506,6 +1579,9 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
@@ -1530,6 +1606,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
 };
 
 } // namespace haswell
@@ -1537,7 +1619,6 @@ class implementation final : public simdutf::implementation {
 
 #endif // SIMDUTF_HASWELL_IMPLEMENTATION_H
 /* end file src/simdutf/haswell/implementation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/intrinsics.h
 /* begin file src/simdutf/haswell/intrinsics.h */
 #ifndef SIMDUTF_HASWELL_INTRINSICS_H
 #define SIMDUTF_HASWELL_INTRINSICS_H
@@ -1606,7 +1687,6 @@ SIMDUTF_POP_DISABLE_WARNINGS
 //
 // The rest need to be inside the region
 //
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/begin.h
 /* begin file src/simdutf/haswell/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "haswell"
 // #define SIMDUTF_IMPLEMENTATION haswell
@@ -1622,7 +1702,6 @@ SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
 #endif // end of workaround
 /* end file src/simdutf/haswell/begin.h */
 // Declarations
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/bitmanipulation.h
 /* begin file src/simdutf/haswell/bitmanipulation.h */
 #ifndef SIMDUTF_HASWELL_BITMANIPULATION_H
 #define SIMDUTF_HASWELL_BITMANIPULATION_H
@@ -1648,7 +1727,6 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) {
 
 #endif // SIMDUTF_HASWELL_BITMANIPULATION_H
 /* end file src/simdutf/haswell/bitmanipulation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd.h
 /* begin file src/simdutf/haswell/simd.h */
 #ifndef SIMDUTF_HASWELL_SIMD_H
 #define SIMDUTF_HASWELL_SIMD_H
@@ -2000,9 +2078,7 @@ namespace simd {
 
       return  simd8x64(
         (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
-        (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
-        (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
-        (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)
+        (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)
       ).to_bitmask();
     }
     simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
@@ -2044,7 +2120,6 @@ namespace simd {
     }
   }; // struct simd8x64
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/simd16-inl.h
 /* begin file src/simdutf/haswell/simd16-inl.h */
 #ifdef __GNUC__
 #if __GNUC__ < 8
@@ -2117,7 +2192,7 @@ struct base16_numeric: base16 {
   simdutf_really_inline simd16& operator-=(const simd16 other) { *this = *this - other; return *static_cast*>(this); }
 };
 
-// Signed words
+// Signed code units
 template<>
 struct simd16 : base16_numeric {
   simdutf_really_inline simd16() : base16_numeric() {}
@@ -2134,7 +2209,7 @@ struct simd16 : base16_numeric {
   simdutf_really_inline simd16 operator<(const simd16 other) const { return _mm256_cmpgt_epi16(other, *this); }
 };
 
-// Unsigned words
+// Unsigned code units
 template<>
 struct simd16: base16_numeric  {
   simdutf_really_inline simd16() : base16_numeric() {}
@@ -2188,7 +2263,7 @@ struct simd16: base16_numeric  {
     return _mm256_shuffle_epi8(*this, swap);
   }
 
-  // Pack with the unsigned saturation two uint16_t words into single uint8_t vector
+  // Pack with the unsigned saturation two uint16_t code units into single uint8_t vector
   static simdutf_really_inline simd8 pack(const simd16& v0, const simd16& v1) {
     // Note: the AVX2 variant of pack operates on 128-bit lanes, thus
     //       we have to shuffle lanes in order to produce bytes in the
@@ -2206,7 +2281,7 @@ struct simd16: base16_numeric  {
     const __m256i t0 = _mm256_set_m128i(lo_1, lo_0);
     const __m256i t1 = _mm256_set_m128i(hi_1, hi_0);
 
-    // pack words in linear order from v0 and v1
+    // pack code units in linear order from v0 and v1
     return _mm256_packus_epi16(t0, t1);
   }
 };
@@ -2291,9 +2366,7 @@ struct simd16: base16_numeric  {
 
       return  simd16x32(
         (this->chunks[0] <= mask_high) & (this->chunks[0] >= mask_low),
-        (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low),
-        (this->chunks[2] <= mask_high) & (this->chunks[2] >= mask_low),
-        (this->chunks[3] <= mask_high) & (this->chunks[3] >= mask_low)
+        (this->chunks[1] <= mask_high) & (this->chunks[1] >= mask_low)
       ).to_bitmask();
     }
     simdutf_really_inline uint64_t not_in_range(const T low, const T high) const {
@@ -2323,7 +2396,6 @@ struct simd16: base16_numeric  {
 #endif // SIMDUTF_HASWELL_SIMD_H
 /* end file src/simdutf/haswell/simd.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h
 /* begin file src/simdutf/haswell/end.h */
 #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL
 // nothing needed.
@@ -2340,7 +2412,6 @@ SIMDUTF_POP_DISABLE_WARNINGS
 #endif // SIMDUTF_IMPLEMENTATION_HASWELL
 #endif // SIMDUTF_HASWELL_COMMON_H
 /* end file src/simdutf/haswell.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere.h
 /* begin file src/simdutf/westmere.h */
 #ifndef SIMDUTF_WESTMERE_H
 #define SIMDUTF_WESTMERE_H
@@ -2381,7 +2452,6 @@ namespace westmere {
 //
 // These two need to be included outside SIMDUTF_TARGET_REGION
 //
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/implementation.h
 /* begin file src/simdutf/westmere/implementation.h */
 #ifndef SIMDUTF_WESTMERE_IMPLEMENTATION_H
 #define SIMDUTF_WESTMERE_IMPLEMENTATION_H
@@ -2409,6 +2479,13 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
@@ -2418,6 +2495,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
@@ -2427,6 +2510,9 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
@@ -2451,6 +2537,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;
 };
 
 } // namespace westmere
@@ -2458,7 +2550,6 @@ class implementation final : public simdutf::implementation {
 
 #endif // SIMDUTF_WESTMERE_IMPLEMENTATION_H
 /* end file src/simdutf/westmere/implementation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/intrinsics.h
 /* begin file src/simdutf/westmere/intrinsics.h */
 #ifndef SIMDUTF_WESTMERE_INTRINSICS_H
 #define SIMDUTF_WESTMERE_INTRINSICS_H
@@ -2507,7 +2598,6 @@ SIMDUTF_POP_DISABLE_WARNINGS
 //
 // The rest need to be inside the region
 //
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/begin.h
 /* begin file src/simdutf/westmere/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "westmere"
 // #define SIMDUTF_IMPLEMENTATION westmere
@@ -2520,7 +2610,6 @@ SIMDUTF_TARGET_WESTMERE
 /* end file src/simdutf/westmere/begin.h */
 
 // Declarations
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/bitmanipulation.h
 /* begin file src/simdutf/westmere/bitmanipulation.h */
 #ifndef SIMDUTF_WESTMERE_BITMANIPULATION_H
 #define SIMDUTF_WESTMERE_BITMANIPULATION_H
@@ -2546,7 +2635,6 @@ simdutf_really_inline long long int count_ones(uint64_t input_num) {
 
 #endif // SIMDUTF_WESTMERE_BITMANIPULATION_H
 /* end file src/simdutf/westmere/bitmanipulation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd.h
 /* begin file src/simdutf/westmere/simd.h */
 #ifndef SIMDUTF_WESTMERE_SIMD_H
 #define SIMDUTF_WESTMERE_SIMD_H
@@ -2894,10 +2982,10 @@ namespace simd {
     }
 
     simdutf_really_inline uint64_t to_bitmask() const {
-      uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() );
-      uint64_t r1 =          this->chunks[1].to_bitmask() ;
-      uint64_t r2 =          this->chunks[2].to_bitmask() ;
-      uint64_t r3 =          this->chunks[3].to_bitmask() ;
+      uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
+      uint64_t r1 =          this->chunks[1].to_bitmask();
+      uint64_t r2 =          this->chunks[2].to_bitmask();
+      uint64_t r3 =          this->chunks[3].to_bitmask();
       return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
     }
 
@@ -2990,7 +3078,6 @@ namespace simd {
     }
   }; // struct simd8x64
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/simd16-inl.h
 /* begin file src/simdutf/westmere/simd16-inl.h */
 template
 struct simd16;
@@ -3054,7 +3141,7 @@ struct base16_numeric: base16 {
   simdutf_really_inline simd16& operator-=(const simd16 other) { *this = *this - other; return *static_cast*>(this); }
 };
 
-// Signed words
+// Signed code units
 template<>
 struct simd16 : base16_numeric {
   simdutf_really_inline simd16() : base16_numeric() {}
@@ -3077,7 +3164,7 @@ struct simd16 : base16_numeric {
   simdutf_really_inline simd16 operator<(const simd16 other) const { return _mm_cmpgt_epi16(other, *this); }
 };
 
-// Unsigned words
+// Unsigned code units
 template<>
 struct simd16: base16_numeric  {
   simdutf_really_inline simd16() : base16_numeric() {}
@@ -3140,7 +3227,7 @@ struct simd16: base16_numeric  {
     return _mm_shuffle_epi8(*this, swap);
   }
 
-  // Pack with the unsigned saturation  two uint16_t words into single uint8_t vector
+  // Pack with the unsigned saturation  two uint16_t code units into single uint8_t vector
   static simdutf_really_inline simd8 pack(const simd16& v0, const simd16& v1) {
     return _mm_packus_epi16(v0, v1);
   }
@@ -3183,10 +3270,10 @@ template
     }
 
     simdutf_really_inline uint64_t to_bitmask() const {
-      uint64_t r0 = uint32_t(this->chunks[0].to_bitmask() );
-      uint64_t r1 =          this->chunks[1].to_bitmask() ;
-      uint64_t r2 =          this->chunks[2].to_bitmask() ;
-      uint64_t r3 =          this->chunks[3].to_bitmask() ;
+      uint64_t r0 = uint32_t(this->chunks[0].to_bitmask());
+      uint64_t r1 =          this->chunks[1].to_bitmask();
+      uint64_t r2 =          this->chunks[2].to_bitmask();
+      uint64_t r3 =          this->chunks[3].to_bitmask();
       return r0 | (r1 << 16) | (r2 << 32) | (r3 << 48);
     }
 
@@ -3267,7 +3354,6 @@ template
 #endif // SIMDUTF_WESTMERE_SIMD_INPUT_H
 /* end file src/simdutf/westmere/simd.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h
 /* begin file src/simdutf/westmere/end.h */
 #if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE
 // nothing needed.
@@ -3280,7 +3366,6 @@ SIMDUTF_UNTARGET_REGION
 #endif // SIMDUTF_IMPLEMENTATION_WESTMERE
 #endif // SIMDUTF_WESTMERE_COMMON_H
 /* end file src/simdutf/westmere.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64.h
 /* begin file src/simdutf/ppc64.h */
 #ifndef SIMDUTF_PPC64_H
 #define SIMDUTF_PPC64_H
@@ -3307,7 +3392,6 @@ namespace ppc64 {
 } // namespace ppc64
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/implementation.h
 /* begin file src/simdutf/ppc64/implementation.h */
 #ifndef SIMDUTF_PPC64_IMPLEMENTATION_H
 #define SIMDUTF_PPC64_IMPLEMENTATION_H
@@ -3386,14 +3470,12 @@ class implementation final : public simdutf::implementation {
 #endif // SIMDUTF_PPC64_IMPLEMENTATION_H
 /* end file src/simdutf/ppc64/implementation.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/begin.h
 /* begin file src/simdutf/ppc64/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "ppc64"
 // #define SIMDUTF_IMPLEMENTATION ppc64
 /* end file src/simdutf/ppc64/begin.h */
 
 // Declarations
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/intrinsics.h
 /* begin file src/simdutf/ppc64/intrinsics.h */
 #ifndef SIMDUTF_PPC64_INTRINSICS_H
 #define SIMDUTF_PPC64_INTRINSICS_H
@@ -3414,7 +3496,6 @@ class implementation final : public simdutf::implementation {
 
 #endif //  SIMDUTF_PPC64_INTRINSICS_H
 /* end file src/simdutf/ppc64/intrinsics.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/bitmanipulation.h
 /* begin file src/simdutf/ppc64/bitmanipulation.h */
 #ifndef SIMDUTF_PPC64_BITMANIPULATION_H
 #define SIMDUTF_PPC64_BITMANIPULATION_H
@@ -3440,7 +3521,6 @@ simdutf_really_inline int count_ones(uint64_t input_num) {
 
 #endif // SIMDUTF_PPC64_BITMANIPULATION_H
 /* end file src/simdutf/ppc64/bitmanipulation.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/simd.h
 /* begin file src/simdutf/ppc64/simd.h */
 #ifndef SIMDUTF_PPC64_SIMD_H
 #define SIMDUTF_PPC64_SIMD_H
@@ -3932,7 +4012,6 @@ template  struct simd8x64 {
 #endif // SIMDUTF_PPC64_SIMD_INPUT_H
 /* end file src/simdutf/ppc64/simd.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h
 /* begin file src/simdutf/ppc64/end.h */
 /* end file src/simdutf/ppc64/end.h */
 
@@ -3940,7 +4019,6 @@ template  struct simd8x64 {
 
 #endif // SIMDUTF_PPC64_H
 /* end file src/simdutf/ppc64.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback.h
 /* begin file src/simdutf/fallback.h */
 #ifndef SIMDUTF_FALLBACK_H
 #define SIMDUTF_FALLBACK_H
@@ -3969,7 +4047,6 @@ namespace fallback {
 } // namespace fallback
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/implementation.h
 /* begin file src/simdutf/fallback/implementation.h */
 #ifndef SIMDUTF_FALLBACK_IMPLEMENTATION_H
 #define SIMDUTF_FALLBACK_IMPLEMENTATION_H
@@ -4000,6 +4077,13 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) const noexcept final;
   simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * buf, size_t len, char16_t* utf16_output) const noexcept final;
@@ -4009,6 +4093,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * buf, size_t len, char32_t* utf32_output) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * buf, size_t len, char32_t* utf32_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) const noexcept final;
@@ -4018,6 +4108,9 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_buffer) const noexcept final;
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
   simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * buf, size_t len, char16_t* utf16_buffer) const noexcept final;
@@ -4042,7 +4135,12 @@ class implementation final : public simdutf::implementation {
   simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept;
   simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept;
-};
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) const noexcept;
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf32_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) const noexcept;
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept;};
 
 } // namespace fallback
 } // namespace simdutf
@@ -4050,14 +4148,12 @@ class implementation final : public simdutf::implementation {
 #endif // SIMDUTF_FALLBACK_IMPLEMENTATION_H
 /* end file src/simdutf/fallback/implementation.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h
 /* begin file src/simdutf/fallback/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "fallback"
 // #define SIMDUTF_IMPLEMENTATION fallback
 /* end file src/simdutf/fallback/begin.h */
 
 // Declarations
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/bitmanipulation.h
 /* begin file src/simdutf/fallback/bitmanipulation.h */
 #ifndef SIMDUTF_FALLBACK_BITMANIPULATION_H
 #define SIMDUTF_FALLBACK_BITMANIPULATION_H
@@ -4068,23 +4164,6 @@ namespace simdutf {
 namespace fallback {
 namespace {
 
-#if defined(_MSC_VER) && !defined(_M_ARM64) && !defined(_M_X64)
-static inline unsigned char _BitScanForward64(unsigned long* ret, uint64_t x) {
-  unsigned long x0 = (unsigned long)x, top, bottom;
-  _BitScanForward(&top, (unsigned long)(x >> 32));
-  _BitScanForward(&bottom, x0);
-  *ret = x0 ? bottom : 32 + top;
-  return x != 0;
-}
-static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
-  unsigned long x1 = (unsigned long)(x >> 32), top, bottom;
-  _BitScanReverse(&top, x1);
-  _BitScanReverse(&bottom, (unsigned long)x);
-  *ret = x1 ? top + 32 : bottom;
-  return x != 0;
-}
-#endif
-
 } // unnamed namespace
 } // namespace fallback
 } // namespace simdutf
@@ -4092,7 +4171,6 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
 #endif // SIMDUTF_FALLBACK_BITMANIPULATION_H
 /* end file src/simdutf/fallback/bitmanipulation.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h
 /* begin file src/simdutf/fallback/end.h */
 /* end file src/simdutf/fallback/end.h */
 
@@ -4100,100 +4178,469 @@ static unsigned char _BitScanReverse64(unsigned long* ret, uint64_t x) {
 #endif // SIMDUTF_FALLBACK_H
 /* end file src/simdutf/fallback.h */
 
+/* begin file src/scalar/utf8.h */
+#ifndef SIMDUTF_UTF8_H
+#define SIMDUTF_UTF8_H
+
 namespace simdutf {
-bool implementation::supported_by_runtime_system() const {
-  uint32_t required_instruction_sets = this->required_instruction_sets();
-  uint32_t supported_instruction_sets = internal::detect_supported_architectures();
-  return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets);
-}
+namespace scalar {
+namespace {
+namespace utf8 {
+#if SIMDUTF_IMPLEMENTATION_FALLBACK
+// only used by the fallback kernel.
+// credit: based on code from Google Fuchsia (Apache Licensed)
+inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
+  const uint8_t *data = reinterpret_cast(buf);
+  uint64_t pos = 0;
+  uint32_t code_point = 0;
+  while (pos < len) {
+    // check of the next 16 bytes are ascii.
+    uint64_t next_pos = pos + 16;
+    if (next_pos <= len) { // if it is safe to read 16 more bytes, check that they are ascii
+      uint64_t v1;
+      std::memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2};
+      if ((v & 0x8080808080808080) == 0) {
+        pos = next_pos;
+        continue;
+      }
+    }
+    unsigned char byte = data[pos];
 
-simdutf_warn_unused encoding_type implementation::autodetect_encoding(const char * input, size_t length) const noexcept {
-    // If there is a BOM, then we trust it.
-    auto bom_encoding = simdutf::BOM::check_bom(input, length);
-    if(bom_encoding != encoding_type::unspecified) { return bom_encoding; }
-    // UTF8 is common, it includes ASCII, and is commonly represented
-    // without a BOM, so if it fits, go with that. Note that it is still
-    // possible to get it wrong, we are only 'guessing'. If some has UTF-16
-    // data without a BOM, it could pass as UTF-8.
-    //
-    // An interesting twist might be to check for UTF-16 ASCII first (every
-    // other byte is zero).
-    if(validate_utf8(input, length)) { return encoding_type::UTF8; }
-    // The next most common encoding that might appear without BOM is probably
-    // UTF-16LE, so try that next.
-    if((length % 2) == 0) {
-      // important: we need to divide by two
-      if(validate_utf16le(reinterpret_cast(input), length/2)) { return encoding_type::UTF16_LE; }
+    while (byte < 0b10000000) {
+      if (++pos == len) { return true; }
+      byte = data[pos];
     }
-    if((length % 4) == 0) {
-      if(validate_utf32(reinterpret_cast(input), length/4)) { return encoding_type::UTF32_LE; }
+
+    if ((byte & 0b11100000) == 0b11000000) {
+      next_pos = pos + 2;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
+      // range check
+      code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
+      if ((code_point < 0x80) || (0x7ff < code_point)) { return false; }
+    } else if ((byte & 0b11110000) == 0b11100000) {
+      next_pos = pos + 3;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
+      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
+      // range check
+      code_point = (byte & 0b00001111) << 12 |
+                   (data[pos + 1] & 0b00111111) << 6 |
+                   (data[pos + 2] & 0b00111111);
+      if ((code_point < 0x800) || (0xffff < code_point) ||
+          (0xd7ff < code_point && code_point < 0xe000)) {
+        return false;
+      }
+    } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
+      next_pos = pos + 4;
+      if (next_pos > len) { return false; }
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
+      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
+      if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; }
+      // range check
+      code_point =
+          (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
+          (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
+      if (code_point <= 0xffff || 0x10ffff < code_point) { return false; }
+    } else {
+      // we may have a continuation
+      return false;
     }
-    return encoding_type::unspecified;
+    pos = next_pos;
+  }
+  return true;
 }
-
-namespace internal {
-
-// Static array of known implementations. We're hoping these get baked into the executable
-// without requiring a static initializer.
-
-
-#if SIMDUTF_IMPLEMENTATION_ICELAKE
-const icelake::implementation icelake_singleton{};
-#endif
-#if SIMDUTF_IMPLEMENTATION_HASWELL
-const haswell::implementation haswell_singleton{};
-#endif
-#if SIMDUTF_IMPLEMENTATION_WESTMERE
-const westmere::implementation westmere_singleton{};
-#endif
-#if SIMDUTF_IMPLEMENTATION_ARM64
-const arm64::implementation arm64_singleton{};
-#endif
-#if SIMDUTF_IMPLEMENTATION_PPC64
-const ppc64::implementation ppc64_singleton{};
-#endif
-#if SIMDUTF_IMPLEMENTATION_FALLBACK
-const fallback::implementation fallback_singleton{};
 #endif
 
-/**
- * @private Detects best supported implementation on first use, and sets it
- */
-class detect_best_supported_implementation_on_first_use final : public implementation {
-public:
-  const std::string &name() const noexcept final { return set_best()->name(); }
-  const std::string &description() const noexcept final { return set_best()->description(); }
-  uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
+inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept {
+  const uint8_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  uint32_t code_point = 0;
+  while (pos < len) {
+    // check of the next 16 bytes are ascii.
+    size_t next_pos = pos + 16;
+    if (next_pos <= len) { // if it is safe to read 16 more bytes, check that they are ascii
+      uint64_t v1;
+      std::memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2};
+      if ((v & 0x8080808080808080) == 0) {
+        pos = next_pos;
+        continue;
+      }
+    }
+    unsigned char byte = data[pos];
 
-  simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
-    return set_best()->detect_encodings(input, length);
-  }
+    while (byte < 0b10000000) {
+      if (++pos == len) { return result(error_code::SUCCESS, len); }
+      byte = data[pos];
+    }
 
-  simdutf_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override {
-    return set_best()->validate_utf8(buf, len);
+    if ((byte & 0b11100000) == 0b11000000) {
+      next_pos = pos + 2;
+      if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
+      // range check
+      code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
+      if ((code_point < 0x80) || (0x7ff < code_point)) { return result(error_code::OVERLONG, pos); }
+    } else if ((byte & 0b11110000) == 0b11100000) {
+      next_pos = pos + 3;
+      if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
+      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
+      // range check
+      code_point = (byte & 0b00001111) << 12 |
+                   (data[pos + 1] & 0b00111111) << 6 |
+                   (data[pos + 2] & 0b00111111);
+      if ((code_point < 0x800) || (0xffff < code_point)) { return result(error_code::OVERLONG, pos);}
+      if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
+    } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
+      next_pos = pos + 4;
+      if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
+      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
+      if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
+      // range check
+      code_point =
+          (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
+          (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
+      if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
+      if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
+    } else {
+      // we either have too many continuation bytes or an invalid leading byte
+      if ((byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
+      else { return result(error_code::HEADER_BITS, pos); }
+    }
+    pos = next_pos;
   }
+  return result(error_code::SUCCESS, len);
+}
 
-  simdutf_warn_unused result validate_utf8_with_errors(const char * buf, size_t len) const noexcept final override {
-    return set_best()->validate_utf8_with_errors(buf, len);
+// Finds the previous leading byte starting backward from buf and validates with errors from there
+// Used to pinpoint the location of an error when an invalid chunk is detected
+// We assume that the stream starts with a leading byte, and to check that it is the case, we
+// ask that you pass a pointer to the start of the stream (start).
+inline simdutf_warn_unused result rewind_and_validate_with_errors(const char *start, const char *buf, size_t len) noexcept {
+    // First check that we start with a leading byte
+  if ((*start & 0b11000000) == 0b10000000) {
+    return result(error_code::TOO_LONG, 0);
   }
-
-  simdutf_warn_unused bool validate_ascii(const char * buf, size_t len) const noexcept final override {
-    return set_best()->validate_ascii(buf, len);
+  size_t extra_len{0};
+  // A leading byte cannot be further than 4 bytes away
+  for(int i = 0; i < 5; i++) {
+    unsigned char byte = *buf;
+    if ((byte & 0b11000000) != 0b10000000) {
+      break;
+    } else {
+      buf--;
+      extra_len++;
+    }
   }
 
-  simdutf_warn_unused result validate_ascii_with_errors(const char * buf, size_t len) const noexcept final override {
-    return set_best()->validate_ascii_with_errors(buf, len);
-  }
+  result res = validate_with_errors(buf, len + extra_len);
+  res.count -= extra_len;
+  return res;
+}
 
-  simdutf_warn_unused bool validate_utf16le(const char16_t * buf, size_t len) const noexcept final override {
-    return set_best()->validate_utf16le(buf, len);
-  }
+inline size_t count_code_points(const char* buf, size_t len) {
+    const int8_t * p = reinterpret_cast(buf);
+    size_t counter{0};
+    for(size_t i = 0; i < len; i++) {
+        // -65 is 0b10111111, anything larger in two-complement's should start a new code point.
+        if(p[i] > -65) { counter++; }
+    }
+    return counter;
+}
 
-  simdutf_warn_unused bool validate_utf16be(const char16_t * buf, size_t len) const noexcept final override {
-    return set_best()->validate_utf16be(buf, len);
-  }
+inline size_t utf16_length_from_utf8(const char* buf, size_t len) {
+    const int8_t * p = reinterpret_cast(buf);
+    size_t counter{0};
+    for(size_t i = 0; i < len; i++) {
+        if(p[i] > -65) { counter++; }
+        if(uint8_t(p[i]) >= 240) { counter++; }
+    }
+    return counter;
+}
 
-  simdutf_warn_unused result validate_utf16le_with_errors(const char16_t * buf, size_t len) const noexcept final override {
+inline size_t latin1_length_from_utf8(const char *buf, size_t len) {
+  const uint8_t * c = reinterpret_cast(buf);
+
+    size_t answer = len;
+    for(size_t i = 0; i < len; i++) {
+        if((c[i] & 0b11100000) == 0b11000000) { answer--; } // if we have a two-byte UTF8 character
+    }
+    return answer;
+}
+
+simdutf_warn_unused inline size_t trim_partial_utf8(const char *input, size_t length) {
+  if (length < 3) {
+    switch (length) {
+      case 2:
+        if (uint8_t(input[length-1]) >= 0xc0) { return length-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        if (uint8_t(input[length-2]) >= 0xe0) { return length-2; } // 3- and 4-byte characters with only 2 bytes left
+        return length;
+      case 1:
+        if (uint8_t(input[length-1]) >= 0xc0) { return length-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+        return length;
+      case 0:
+        return length;
+    }
+  }
+  if (uint8_t(input[length-1]) >= 0xc0) { return length-1; } // 2-, 3- and 4-byte characters with only 1 byte left
+  if (uint8_t(input[length-2]) >= 0xe0) { return length-2; } // 3- and 4-byte characters with only 1 byte left
+  if (uint8_t(input[length-3]) >= 0xf0) { return length-3; } // 4-byte characters with only 3 bytes left
+  return length;
+}
+
+} // utf8 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
+
+#endif
+/* end file src/scalar/utf8.h */
+/* begin file src/scalar/utf16.h */
+#ifndef SIMDUTF_UTF16_H
+#define SIMDUTF_UTF16_H
+
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf16 {
+
+inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
+  return uint16_t((word >> 8) | (word << 8));
+}
+
+template 
+inline simdutf_warn_unused bool validate(const char16_t *buf, size_t len) noexcept {
+  const uint16_t *data = reinterpret_cast(buf);
+  uint64_t pos = 0;
+  while (pos < len) {
+    uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
+    if((word &0xF800) == 0xD800) {
+        if(pos + 1 >= len) { return false; }
+        uint16_t diff = uint16_t(word - 0xD800);
+        if(diff > 0x3FF) { return false; }
+        uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
+        uint16_t diff2 = uint16_t(next_word - 0xDC00);
+        if(diff2 > 0x3FF) { return false; }
+        pos += 2;
+    } else {
+        pos++;
+    }
+  }
+  return true;
+}
+
+template 
+inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, size_t len) noexcept {
+  const uint16_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  while (pos < len) {
+    uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
+    if((word & 0xF800) == 0xD800) {
+        if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); }
+        uint16_t diff = uint16_t(word - 0xD800);
+        if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
+        uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
+        uint16_t diff2 = uint16_t(next_word - 0xDC00);
+        if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
+        pos += 2;
+    } else {
+        pos++;
+    }
+  }
+  return result(error_code::SUCCESS, pos);
+}
+
+template 
+inline size_t count_code_points(const char16_t* buf, size_t len) {
+  // We are not BOM aware.
+  const uint16_t * p = reinterpret_cast(buf);
+  size_t counter{0};
+  for(size_t i = 0; i < len; i++) {
+    uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
+    counter += ((word & 0xFC00) != 0xDC00);
+  }
+  return counter;
+}
+
+template 
+inline size_t utf8_length_from_utf16(const char16_t* buf, size_t len) {
+  // We are not BOM aware.
+  const uint16_t * p = reinterpret_cast(buf);
+  size_t counter{0};
+  for(size_t i = 0; i < len; i++) {
+    uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
+    counter++;                                      // ASCII
+    counter += static_cast(word > 0x7F);    // non-ASCII is at least 2 bytes, surrogates are 2*2 == 4 bytes
+    counter += static_cast((word > 0x7FF && word <= 0xD7FF) || (word >= 0xE000));   // three-byte
+  }
+  return counter;
+}
+
+template 
+inline size_t utf32_length_from_utf16(const char16_t* buf, size_t len) {
+  // We are not BOM aware.
+  const uint16_t * p = reinterpret_cast(buf);
+  size_t counter{0};
+  for(size_t i = 0; i < len; i++) {
+    uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
+    counter += ((word & 0xFC00) != 0xDC00);
+  }
+  return counter;
+}
+
+
+inline size_t latin1_length_from_utf16(size_t len) {
+  return len;
+}
+
+simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* out) {
+  const uint16_t * input = reinterpret_cast(in);
+  uint16_t * output = reinterpret_cast(out);
+  for (size_t i = 0; i < size; i++) {
+    *output++ = uint16_t(input[i] >> 8 | input[i] << 8);
+  }
+}
+
+
+template 
+simdutf_warn_unused inline size_t trim_partial_utf16(const char16_t* input, size_t length) {
+  if (length <= 1) {
+    return length;
+  }
+  uint16_t last_word = uint16_t(input[length-1]);
+  last_word = !match_system(big_endian) ? swap_bytes(last_word) : last_word;
+  length -= ((last_word & 0xFC00) == 0xD800);
+  return length;
+}
+
+} // utf16 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
+
+#endif
+/* end file src/scalar/utf16.h */
+
+namespace simdutf {
+bool implementation::supported_by_runtime_system() const {
+  uint32_t required_instruction_sets = this->required_instruction_sets();
+  uint32_t supported_instruction_sets = internal::detect_supported_architectures();
+  return ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets);
+}
+
+simdutf_warn_unused encoding_type implementation::autodetect_encoding(const char * input, size_t length) const noexcept {
+    // If there is a BOM, then we trust it.
+    auto bom_encoding = simdutf::BOM::check_bom(input, length);
+    if(bom_encoding != encoding_type::unspecified) { return bom_encoding; }
+    // UTF8 is common, it includes ASCII, and is commonly represented
+    // without a BOM, so if it fits, go with that. Note that it is still
+    // possible to get it wrong, we are only 'guessing'. If some has UTF-16
+    // data without a BOM, it could pass as UTF-8.
+    //
+    // An interesting twist might be to check for UTF-16 ASCII first (every
+    // other byte is zero).
+    if(validate_utf8(input, length)) { return encoding_type::UTF8; }
+    // The next most common encoding that might appear without BOM is probably
+    // UTF-16LE, so try that next.
+    if((length % 2) == 0) {
+      // important: we need to divide by two
+      if(validate_utf16le(reinterpret_cast(input), length/2)) { return encoding_type::UTF16_LE; }
+    }
+    if((length % 4) == 0) {
+      if(validate_utf32(reinterpret_cast(input), length/4)) { return encoding_type::UTF32_LE; }
+    }
+    return encoding_type::unspecified;
+}
+
+namespace internal {
+
+// Static array of known implementations. We're hoping these get baked into the executable
+// without requiring a static initializer.
+
+
+#if SIMDUTF_IMPLEMENTATION_ICELAKE
+static const icelake::implementation* get_icelake_singleton() {
+  static const icelake::implementation icelake_singleton{};
+  return &icelake_singleton;
+}
+#endif
+#if SIMDUTF_IMPLEMENTATION_HASWELL
+static const haswell::implementation* get_haswell_singleton() {
+  static const haswell::implementation haswell_singleton{};
+  return &haswell_singleton;
+}
+#endif
+#if SIMDUTF_IMPLEMENTATION_WESTMERE
+static const westmere::implementation* get_westmere_singleton() {
+  static const westmere::implementation westmere_singleton{};
+  return &westmere_singleton;
+}
+#endif
+#if SIMDUTF_IMPLEMENTATION_ARM64
+static const arm64::implementation* get_arm64_singleton() {
+  static const arm64::implementation arm64_singleton{};
+  return &arm64_singleton;
+}
+#endif
+#if SIMDUTF_IMPLEMENTATION_PPC64
+static const ppc64::implementation* get_ppc64_singleton() {
+  static const ppc64::implementation ppc64_singleton{};
+  return &ppc64_singleton;
+}
+#endif
+#if SIMDUTF_IMPLEMENTATION_FALLBACK
+static const fallback::implementation* get_fallback_singleton() {
+  static const fallback::implementation fallback_singleton{};
+  return &fallback_singleton;
+}
+#endif
+
+/**
+ * @private Detects best supported implementation on first use, and sets it
+ */
+class detect_best_supported_implementation_on_first_use final : public implementation {
+public:
+  const std::string &name() const noexcept final { return set_best()->name(); }
+  const std::string &description() const noexcept final { return set_best()->description(); }
+  uint32_t required_instruction_sets() const noexcept final { return set_best()->required_instruction_sets(); }
+
+  simdutf_warn_unused int detect_encodings(const char * input, size_t length) const noexcept override {
+    return set_best()->detect_encodings(input, length);
+  }
+
+  simdutf_warn_unused bool validate_utf8(const char * buf, size_t len) const noexcept final override {
+    return set_best()->validate_utf8(buf, len);
+  }
+
+  simdutf_warn_unused result validate_utf8_with_errors(const char * buf, size_t len) const noexcept final override {
+    return set_best()->validate_utf8_with_errors(buf, len);
+  }
+
+  simdutf_warn_unused bool validate_ascii(const char * buf, size_t len) const noexcept final override {
+    return set_best()->validate_ascii(buf, len);
+  }
+
+  simdutf_warn_unused result validate_ascii_with_errors(const char * buf, size_t len) const noexcept final override {
+    return set_best()->validate_ascii_with_errors(buf, len);
+  }
+
+  simdutf_warn_unused bool validate_utf16le(const char16_t * buf, size_t len) const noexcept final override {
+    return set_best()->validate_utf16le(buf, len);
+  }
+
+  simdutf_warn_unused bool validate_utf16be(const char16_t * buf, size_t len) const noexcept final override {
+    return set_best()->validate_utf16be(buf, len);
+  }
+
+  simdutf_warn_unused result validate_utf16le_with_errors(const char16_t * buf, size_t len) const noexcept final override {
     return set_best()->validate_utf16le_with_errors(buf, len);
   }
 
@@ -4209,6 +4656,34 @@ class detect_best_supported_implementation_on_first_use final : public implement
     return set_best()->validate_utf32_with_errors(buf, len);
   }
 
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept final override {
+    return set_best()->convert_latin1_to_utf8(buf, len,utf8_output);
+  }
+
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
+    return set_best()->convert_latin1_to_utf16le(buf, len, utf16_output);
+  }
+
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
+    return set_best()->convert_latin1_to_utf16be(buf, len, utf16_output);
+  }
+
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t * latin1_output) const noexcept final override {
+    return set_best()->convert_latin1_to_utf32(buf, len,latin1_output);
+  }
+
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf8_to_latin1(buf, len,latin1_output);
+  }
+
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept  final override {
+  return set_best()->convert_utf8_to_latin1_with_errors(buf, len, latin1_output);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_valid_utf8_to_latin1(buf, len,latin1_output);
+  }
+
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) const noexcept final override {
     return set_best()->convert_utf8_to_utf16le(buf, len, utf16_output);
   }
@@ -4245,6 +4720,30 @@ class detect_best_supported_implementation_on_first_use final : public implement
     return set_best()->convert_valid_utf8_to_utf32(buf, len, utf32_output);
   }
 
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf16le_to_latin1(buf, len, latin1_output);
+  }
+
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf16be_to_latin1(buf, len, latin1_output);
+  }
+
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf16le_to_latin1_with_errors(buf, len, latin1_output);
+  }
+
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf16be_to_latin1_with_errors(buf, len, latin1_output);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_valid_utf16le_to_latin1(buf, len, latin1_output);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_valid_utf16be_to_latin1(buf, len, latin1_output);
+  }
+
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_output) const noexcept final override {
     return set_best()->convert_utf16le_to_utf8(buf, len, utf8_output);
   }
@@ -4269,6 +4768,18 @@ class detect_best_supported_implementation_on_first_use final : public implement
     return set_best()->convert_valid_utf16be_to_utf8(buf, len, utf8_output);
   }
 
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf32_to_latin1(buf, len,latin1_output);
+  }
+
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf32_to_latin1_with_errors(buf, len,latin1_output);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * buf, size_t len, char* latin1_output) const noexcept final override {
+    return set_best()->convert_utf32_to_latin1(buf, len,latin1_output);
+  }
+
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * buf, size_t len, char* utf8_output) const noexcept final override {
     return set_best()->convert_utf32_to_utf8(buf, len, utf8_output);
   }
@@ -4345,6 +4856,22 @@ class detect_best_supported_implementation_on_first_use final : public implement
     return set_best()->count_utf8(buf, len);
   }
 
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char * buf, size_t len) const noexcept override {
+    return set_best()->latin1_length_from_utf8(buf, len);
+  }
+
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t len) const noexcept override {
+    return set_best()->latin1_length_from_utf16(len);
+  }
+
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t len) const noexcept override {
+    return set_best()->latin1_length_from_utf32(len);
+  }
+
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char * buf, size_t len) const noexcept override {
+    return set_best()->utf8_length_from_latin1(buf, len);
+  }
+
   simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * buf, size_t len) const noexcept override {
     return set_best()->utf8_length_from_utf16le(buf, len);
   }
@@ -4353,6 +4880,14 @@ class detect_best_supported_implementation_on_first_use final : public implement
     return set_best()->utf8_length_from_utf16be(buf, len);
   }
 
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t len) const noexcept override {
+    return set_best()->utf16_length_from_latin1(len);
+  }
+
+  simdutf_warn_unused size_t utf32_length_from_latin1(size_t len) const noexcept override {
+    return set_best()->utf32_length_from_latin1(len);
+  }
+
   simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * buf, size_t len) const noexcept override {
     return set_best()->utf32_length_from_utf16le(buf, len);
   }
@@ -4383,27 +4918,29 @@ class detect_best_supported_implementation_on_first_use final : public implement
   const implementation *set_best() const noexcept;
 };
 
-
-const std::initializer_list available_implementation_pointers {
+static const std::initializer_list& get_available_implementation_pointers() {
+  static const std::initializer_list available_implementation_pointers {
 #if SIMDUTF_IMPLEMENTATION_ICELAKE
-  &icelake_singleton,
+    get_icelake_singleton(),
 #endif
 #if SIMDUTF_IMPLEMENTATION_HASWELL
-  &haswell_singleton,
+    get_haswell_singleton(),
 #endif
 #if SIMDUTF_IMPLEMENTATION_WESTMERE
-  &westmere_singleton,
+    get_westmere_singleton(),
 #endif
 #if SIMDUTF_IMPLEMENTATION_ARM64
-  &arm64_singleton,
+    get_arm64_singleton(),
 #endif
 #if SIMDUTF_IMPLEMENTATION_PPC64
-  &ppc64_singleton,
+    get_ppc64_singleton(),
 #endif
 #if SIMDUTF_IMPLEMENTATION_FALLBACK
-  &fallback_singleton,
+    get_fallback_singleton(),
 #endif
-}; // available_implementation_pointers
+  }; // available_implementation_pointers
+  return available_implementation_pointers;
+}
 
 // So we can return UNSUPPORTED_ARCHITECTURE from the parser when there is no support
 class unsupported_implementation final : public implementation {
@@ -4459,6 +4996,34 @@ class unsupported_implementation final : public implementation {
     return result(error_code::OTHER, 0);
   }
 
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char*, size_t, char16_t*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char*, size_t, char16_t*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char*, size_t, char32_t*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char*, size_t, char*) const noexcept final override {
+    return result(error_code::OTHER, 0);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
   simdutf_warn_unused size_t convert_utf8_to_utf16le(const char*, size_t, char16_t*) const noexcept final override {
     return 0;
   }
@@ -4495,6 +5060,30 @@ class unsupported_implementation final : public implementation {
     return 0;
   }
 
+  simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t*, size_t, char*) const noexcept final override {
+    return result(error_code::OTHER, 0);
+  }
+
+  simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t*, size_t, char*) const noexcept final override {
+    return result(error_code::OTHER, 0);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t*, size_t, char*) const noexcept final override {
+    return 0;
+  }
+
   simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t*, size_t, char*) const noexcept final override {
     return 0;
   }
@@ -4519,6 +5108,18 @@ class unsupported_implementation final : public implementation {
     return 0;
   }
 
+  simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t *, size_t, char* ) const noexcept final override {
+    return 0;
+  }
+
+  simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t *, size_t, char* ) const noexcept final override {
+    return result(error_code::OTHER, 0);
+  }
+
+  simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t *, size_t, char* ) const noexcept final override {
+    return 0;
+  }
+
   simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t*, size_t, char*) const noexcept final override {
     return 0;
   }
@@ -4595,6 +5196,21 @@ class unsupported_implementation final : public implementation {
     return 0;
   }
 
+  simdutf_warn_unused size_t latin1_length_from_utf8(const char *, size_t) const noexcept override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t latin1_length_from_utf16(size_t) const noexcept override {
+    return 0;
+  }
+
+  simdutf_warn_unused size_t latin1_length_from_utf32(size_t) const noexcept override {
+    return 0;
+  }
+  simdutf_warn_unused size_t utf8_length_from_latin1(const char *, size_t) const noexcept override {
+    return 0;
+  }
+
   simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t *, size_t) const noexcept override {
     return 0;
   }
@@ -4611,10 +5227,16 @@ class unsupported_implementation final : public implementation {
     return 0;
   }
 
-  simdutf_warn_unused size_t utf16_length_from_utf8(const char *, size_t) const noexcept override {
+    simdutf_warn_unused size_t utf32_length_from_latin1(size_t) const noexcept override {
     return 0;
   }
 
+  simdutf_warn_unused size_t utf16_length_from_utf8(const char *, size_t) const noexcept override {
+    return 0;
+  }
+  simdutf_warn_unused size_t utf16_length_from_latin1(size_t) const noexcept override {
+    return 0;
+  }
   simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t *, size_t) const noexcept override {
     return 0;
   }
@@ -4633,18 +5255,18 @@ class unsupported_implementation final : public implementation {
 const unsupported_implementation unsupported_singleton{};
 
 size_t available_implementation_list::size() const noexcept {
-  return internal::available_implementation_pointers.size();
+  return internal::get_available_implementation_pointers().size();
 }
 const implementation * const *available_implementation_list::begin() const noexcept {
-  return internal::available_implementation_pointers.begin();
+  return internal::get_available_implementation_pointers().begin();
 }
 const implementation * const *available_implementation_list::end() const noexcept {
-  return internal::available_implementation_pointers.end();
+  return internal::get_available_implementation_pointers().end();
 }
 const implementation *available_implementation_list::detect_best_supported() const noexcept {
   // They are prelisted in priority order, so we just go down the list
   uint32_t supported_instruction_sets = internal::detect_supported_architectures();
-  for (const implementation *impl : internal::available_implementation_pointers) {
+  for (const implementation *impl : internal::get_available_implementation_pointers()) {
     uint32_t required_instruction_sets = impl->required_instruction_sets();
     if ((supported_instruction_sets & required_instruction_sets) == required_instruction_sets) { return impl; }
   }
@@ -4709,6 +5331,27 @@ simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t leng
   return convert_utf8_to_utf16le(input, length, utf16_output);
   #endif
 }
+simdutf_warn_unused size_t convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) noexcept {
+  return get_active_implementation()->convert_latin1_to_utf8(buf, len,utf8_output);
+}
+simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * buf, size_t len, char16_t* utf16_output) noexcept {
+  return get_active_implementation()->convert_latin1_to_utf16le(buf, len, utf16_output);
+}
+simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * buf, size_t len, char16_t* utf16_output) noexcept{
+  return get_active_implementation()->convert_latin1_to_utf16be(buf, len, utf16_output);
+}
+simdutf_warn_unused size_t convert_latin1_to_utf32(const char * buf, size_t len, char32_t * latin1_output) noexcept {
+  return get_active_implementation()->convert_latin1_to_utf32(buf, len,latin1_output);
+}
+simdutf_warn_unused size_t convert_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) noexcept {
+  return get_active_implementation()->convert_utf8_to_latin1(buf, len,latin1_output);
+}
+simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) noexcept {
+  return get_active_implementation()->convert_utf8_to_latin1_with_errors(buf, len, latin1_output);
+}
+simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * buf, size_t len, char* latin1_output) noexcept {
+  return get_active_implementation()->convert_valid_utf8_to_latin1(buf, len,latin1_output);
+}
 simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept {
   return get_active_implementation()->convert_utf8_to_utf16le(input, length, utf16_output);
 }
@@ -4789,6 +5432,38 @@ simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * buf, size_t le
   return convert_utf16le_to_utf8(buf, len, utf8_buffer);
   #endif
 }
+simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  #if SIMDUTF_IS_BIG_ENDIAN
+  return convert_utf16be_to_latin1(buf, len, latin1_buffer);
+  #else
+  return convert_utf16le_to_latin1(buf, len, latin1_buffer);
+  #endif
+}
+simdutf_warn_unused size_t convert_latin1_to_utf16(const char * buf, size_t len, char16_t* utf16_output) noexcept {
+  #if SIMDUTF_IS_BIG_ENDIAN
+  return convert_latin1_to_utf16be(buf, len, utf16_output);
+  #else
+  return convert_latin1_to_utf16le(buf, len, utf16_output);
+  #endif
+}
+simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  return get_active_implementation()->convert_utf16be_to_latin1(buf, len, latin1_buffer);
+}
+simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  return get_active_implementation()->convert_utf16le_to_latin1(buf, len, latin1_buffer);
+}
+simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  return get_active_implementation()->convert_valid_utf16be_to_latin1(buf, len, latin1_buffer);
+}
+simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  return get_active_implementation()->convert_valid_utf16le_to_latin1(buf, len, latin1_buffer);
+}
+simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  return get_active_implementation()->convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer);
+}
+simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  return get_active_implementation()->convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer);
+}
 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
   return get_active_implementation()->convert_utf16le_to_utf8(buf, len, utf8_buffer);
 }
@@ -4802,6 +5477,13 @@ simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * bu
   return convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer);
   #endif
 }
+simdutf_warn_unused result convert_utf16_to_latin1_with_errors(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  #if SIMDUTF_IS_BIG_ENDIAN
+  return convert_utf16be_to_latin1_with_errors(buf, len, latin1_buffer);
+  #else
+  return convert_utf16le_to_latin1_with_errors(buf, len, latin1_buffer);
+  #endif
+}
 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
   return get_active_implementation()->convert_utf16le_to_utf8_with_errors(buf, len, utf8_buffer);
 }
@@ -4815,6 +5497,13 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * buf, siz
   return convert_valid_utf16le_to_utf8(buf, len, utf8_buffer);
   #endif
 }
+simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * buf, size_t len, char* latin1_buffer) noexcept {
+  #if SIMDUTF_IS_BIG_ENDIAN
+  return convert_valid_utf16be_to_latin1(buf, len, latin1_buffer);
+  #else
+  return convert_valid_utf16le_to_latin1(buf, len, latin1_buffer);
+  #endif
+}
 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * buf, size_t len, char* utf8_buffer) noexcept {
   return get_active_implementation()->convert_valid_utf16le_to_utf8(buf, len, utf8_buffer);
 }
@@ -4837,6 +5526,9 @@ simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * buf, size_t l
   return convert_utf32_to_utf16le(buf, len, utf16_buffer);
   #endif
 }
+simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_output) noexcept {
+  return get_active_implementation()->convert_utf32_to_latin1(input, length, latin1_output);
+}
 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * buf, size_t len, char16_t* utf16_buffer) noexcept {
   return get_active_implementation()->convert_utf32_to_utf16le(buf, len, utf16_buffer);
 }
@@ -4927,6 +5619,18 @@ simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length)
 simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept {
   return get_active_implementation()->count_utf8(input, length);
 }
+simdutf_warn_unused size_t latin1_length_from_utf8(const char * buf, size_t len) noexcept {
+  return get_active_implementation()->latin1_length_from_utf8(buf, len);
+}
+simdutf_warn_unused size_t latin1_length_from_utf16(size_t len) noexcept {
+  return get_active_implementation()->latin1_length_from_utf16(len);
+}
+simdutf_warn_unused size_t latin1_length_from_utf32(size_t len) noexcept {
+  return get_active_implementation()->latin1_length_from_utf32(len);
+}
+simdutf_warn_unused size_t utf8_length_from_latin1(const char * buf, size_t len) noexcept {
+  return get_active_implementation()->utf8_length_from_latin1(buf, len);
+}
 simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept {
   #if SIMDUTF_IS_BIG_ENDIAN
   return utf8_length_from_utf16be(input, length);
@@ -4956,6 +5660,9 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, siz
 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept {
   return get_active_implementation()->utf16_length_from_utf8(input, length);
 }
+simdutf_warn_unused size_t utf16_length_from_latin1(size_t length) noexcept {
+  return get_active_implementation()->utf16_length_from_latin1(length);
+}
 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept {
   return get_active_implementation()->utf8_length_from_utf32(input, length);
 }
@@ -4971,17 +5678,34 @@ simdutf_warn_unused simdutf::encoding_type autodetect_encoding(const char * buf,
 simdutf_warn_unused int detect_encodings(const char * buf, size_t length) noexcept {
   return get_active_implementation()->detect_encodings(buf, length);
 }
-
 const implementation * builtin_implementation() {
   static const implementation * builtin_impl = get_available_implementations()[SIMDUTF_STRINGIFY(SIMDUTF_BUILTIN_IMPLEMENTATION)];
   return builtin_impl;
 }
 
+simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length) {
+  return scalar::utf8::trim_partial_utf8(input, length);
+}
+
+simdutf_warn_unused size_t trim_partial_utf16be(const char16_t* input, size_t length) {
+  return scalar::utf16::trim_partial_utf16(input, length);
+}
+
+simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t length) {
+  return scalar::utf16::trim_partial_utf16(input, length);
+}
+
+simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length) {
+  #if SIMDUTF_IS_BIG_ENDIAN
+  return trim_partial_utf16be(input, length);
+  #else
+  return trim_partial_utf16le(input, length);
+  #endif
+}
 
 } // namespace simdutf
 
 /* end file src/implementation.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=encoding_types.cpp
 /* begin file src/encoding_types.cpp */
 
 namespace simdutf {
@@ -5043,7 +5767,6 @@ encoding_type check_bom(const char* byte, size_t length) {
 }
 }
 /* end file src/encoding_types.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=error.cpp
 /* begin file src/error.cpp */
 namespace simdutf {
 
@@ -5055,7 +5778,6 @@ namespace simdutf {
 /* end file src/error.cpp */
 // The large tables should be included once and they
 // should not depend on a kernel.
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=tables/utf8_to_utf16_tables.h
 /* begin file src/tables/utf8_to_utf16_tables.h */
 #ifndef SIMDUTF_UTF8_TO_UTF16_TABLES_H
 #define SIMDUTF_UTF8_TO_UTF16_TABLES_H
@@ -9394,7 +10116,6 @@ const uint8_t utf8bigindex[4096][2] =
 
 #endif // SIMDUTF_UTF8_TO_UTF16_TABLES_H
 /* end file src/tables/utf8_to_utf16_tables.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=tables/utf16_to_utf8_tables.h
 /* begin file src/tables/utf16_to_utf8_tables.h */
 // file generated by scripts/sse_convert_utf16_to_utf8.py
 #ifndef SIMDUTF_UTF16_TO_UTF8_TABLES_H
@@ -9935,7 +10656,6 @@ namespace utf16_to_utf8 {
 // End of tables.
 
 // The scalar routines should be included once.
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/ascii.h
 /* begin file src/scalar/ascii.h */
 #ifndef SIMDUTF_ASCII_H
 #define SIMDUTF_ASCII_H
@@ -9950,7 +10670,7 @@ inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
     const uint8_t *data = reinterpret_cast(buf);
     uint64_t pos = 0;
     // process in blocks of 16 bytes when possible
-    for (;pos + 16 < len; pos += 16) {
+    for (;pos + 16 <= len; pos += 16) {
         uint64_t v1;
         std::memcpy(&v1, data + pos, sizeof(uint64_t));
         uint64_t v2;
@@ -9970,7 +10690,7 @@ inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t l
     const uint8_t *data = reinterpret_cast(buf);
     size_t pos = 0;
     // process in blocks of 16 bytes when possible
-    for (;pos + 16 < len; pos += 16) {
+    for (;pos + 16 <= len; pos += 16) {
         uint64_t v1;
         std::memcpy(&v1, data + pos, sizeof(uint64_t));
         uint64_t v2;
@@ -9996,379 +10716,106 @@ inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t l
 
 #endif
 /* end file src/scalar/ascii.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8.h
-/* begin file src/scalar/utf8.h */
-#ifndef SIMDUTF_UTF8_H
-#define SIMDUTF_UTF8_H
+/* begin file src/scalar/utf32.h */
+#ifndef SIMDUTF_UTF32_H
+#define SIMDUTF_UTF32_H
 
 namespace simdutf {
 namespace scalar {
 namespace {
-namespace utf8 {
-#if SIMDUTF_IMPLEMENTATION_FALLBACK
-// only used by the fallback kernel.
-// credit: based on code from Google Fuchsia (Apache Licensed)
-inline simdutf_warn_unused bool validate(const char *buf, size_t len) noexcept {
-  const uint8_t *data = reinterpret_cast(buf);
-  uint64_t pos = 0;
-  uint32_t code_point = 0;
-  while (pos < len) {
-    // check of the next 8 bytes are ascii.
-    uint64_t next_pos = pos + 16;
-    if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii
-      uint64_t v1;
-      std::memcpy(&v1, data + pos, sizeof(uint64_t));
-      uint64_t v2;
-      std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
-      uint64_t v{v1 | v2};
-      if ((v & 0x8080808080808080) == 0) {
-        pos = next_pos;
-        continue;
-      }
-    }
-    unsigned char byte = data[pos];
-
-    while (byte < 0b10000000) {
-      if (++pos == len) { return true; }
-      byte = data[pos];
-    }
+namespace utf32 {
 
-    if ((byte & 0b11100000) == 0b11000000) {
-      next_pos = pos + 2;
-      if (next_pos > len) { return false; }
-      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
-      // range check
-      code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
-      if ((code_point < 0x80) || (0x7ff < code_point)) { return false; }
-    } else if ((byte & 0b11110000) == 0b11100000) {
-      next_pos = pos + 3;
-      if (next_pos > len) { return false; }
-      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
-      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
-      // range check
-      code_point = (byte & 0b00001111) << 12 |
-                   (data[pos + 1] & 0b00111111) << 6 |
-                   (data[pos + 2] & 0b00111111);
-      if ((code_point < 0x800) || (0xffff < code_point) ||
-          (0xd7ff < code_point && code_point < 0xe000)) {
+inline simdutf_warn_unused bool validate(const char32_t *buf, size_t len) noexcept {
+  const uint32_t *data = reinterpret_cast(buf);
+  uint64_t pos = 0;
+  for(;pos < len; pos++) {
+    uint32_t word = data[pos];
+    if(word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) {
         return false;
-      }
-    } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
-      next_pos = pos + 4;
-      if (next_pos > len) { return false; }
-      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return false; }
-      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return false; }
-      if ((data[pos + 3] & 0b11000000) != 0b10000000) { return false; }
-      // range check
-      code_point =
-          (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
-          (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
-      if (code_point <= 0xffff || 0x10ffff < code_point) { return false; }
-    } else {
-      // we may have a continuation
-      return false;
     }
-    pos = next_pos;
   }
   return true;
 }
-#endif
 
-inline simdutf_warn_unused result validate_with_errors(const char *buf, size_t len) noexcept {
-  const uint8_t *data = reinterpret_cast(buf);
+inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, size_t len) noexcept {
+  const uint32_t *data = reinterpret_cast(buf);
   size_t pos = 0;
-  uint32_t code_point = 0;
-  while (pos < len) {
-    // check of the next 8 bytes are ascii.
-    size_t next_pos = pos + 16;
-    if (next_pos <= len) { // if it is safe to read 8 more bytes, check that they are ascii
-      uint64_t v1;
-      std::memcpy(&v1, data + pos, sizeof(uint64_t));
-      uint64_t v2;
-      std::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
-      uint64_t v{v1 | v2};
-      if ((v & 0x8080808080808080) == 0) {
-        pos = next_pos;
-        continue;
-      }
+  for(;pos < len; pos++) {
+    uint32_t word = data[pos];
+    if(word > 0x10FFFF) {
+        return result(error_code::TOO_LARGE, pos);
     }
-    unsigned char byte = data[pos];
-
-    while (byte < 0b10000000) {
-      if (++pos == len) { return result(error_code::SUCCESS, len); }
-      byte = data[pos];
+    if(word >= 0xD800 && word <= 0xDFFF) {
+        return result(error_code::SURROGATE, pos);
     }
+  }
+  return result(error_code::SUCCESS, pos);
+}
 
-    if ((byte & 0b11100000) == 0b11000000) {
-      next_pos = pos + 2;
-      if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
-      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
-      // range check
-      code_point = (byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111);
-      if ((code_point < 0x80) || (0x7ff < code_point)) { return result(error_code::OVERLONG, pos); }
-    } else if ((byte & 0b11110000) == 0b11100000) {
-      next_pos = pos + 3;
-      if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
-      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
-      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
-      // range check
-      code_point = (byte & 0b00001111) << 12 |
-                   (data[pos + 1] & 0b00111111) << 6 |
-                   (data[pos + 2] & 0b00111111);
-      if ((code_point < 0x800) || (0xffff < code_point)) { return result(error_code::OVERLONG, pos);}
-      if (0xd7ff < code_point && code_point < 0xe000) { return result(error_code::SURROGATE, pos); }
-    } else if ((byte & 0b11111000) == 0b11110000) { // 0b11110000
-      next_pos = pos + 4;
-      if (next_pos > len) { return result(error_code::TOO_SHORT, pos); }
-      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
-      if ((data[pos + 2] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
-      if ((data[pos + 3] & 0b11000000) != 0b10000000) { return result(error_code::TOO_SHORT, pos); }
-      // range check
-      code_point =
-          (byte & 0b00000111) << 18 | (data[pos + 1] & 0b00111111) << 12 |
-          (data[pos + 2] & 0b00111111) << 6 | (data[pos + 3] & 0b00111111);
-      if (code_point <= 0xffff) { return result(error_code::OVERLONG, pos); }
-      if (0x10ffff < code_point) { return result(error_code::TOO_LARGE, pos); }
-    } else {
-      // we either have too many continuation bytes or an invalid leading byte
-      if ((byte & 0b11000000) == 0b10000000) { return result(error_code::TOO_LONG, pos); }
-      else { return result(error_code::HEADER_BITS, pos); }
-    }
-    pos = next_pos;
-  }
-  return result(error_code::SUCCESS, len);
-}
-
-// Finds the previous leading byte and validates with errors from there
-// Used to pinpoint the location of an error when an invalid chunk is detected
-inline simdutf_warn_unused result rewind_and_validate_with_errors(const char *start, const char *buf, size_t len) noexcept {
-  // First check that we start with a leading byte
-  if ((*start & 0b11000000) == 0b10000000) {
-    return result(error_code::TOO_LONG, 0);
-  }
-  size_t extra_len{0};
-  // A leading byte cannot be further than 4 bytes away
-  for(int i = 0; i < 5; i++) {
-    unsigned char byte = *buf;
-    if ((byte & 0b11000000) != 0b10000000) {
-      break;
-    } else {
-      buf--;
-      extra_len++;
-    }
-  }
-
-  result res = validate_with_errors(buf, len + extra_len);
-  res.count -= extra_len;
-  return res;
-}
-
-inline size_t count_code_points(const char* buf, size_t len) {
-    const int8_t * p = reinterpret_cast(buf);
-    size_t counter{0};
-    for(size_t i = 0; i < len; i++) {
-        // -65 is 0b10111111, anything larger in two-complement's should start a new code point.
-        if(p[i] > -65) { counter++; }
-    }
-    return counter;
-}
-
-inline size_t utf16_length_from_utf8(const char* buf, size_t len) {
-    const int8_t * p = reinterpret_cast(buf);
-    size_t counter{0};
-    for(size_t i = 0; i < len; i++) {
-        if(p[i] > -65) { counter++; }
-        if(uint8_t(p[i]) >= 240) { counter++; }
-    }
-    return counter;
-}
-
-} // utf8 namespace
-} // unnamed namespace
-} // namespace scalar
-} // namespace simdutf
-
-#endif
-/* end file src/scalar/utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16.h
-/* begin file src/scalar/utf16.h */
-#ifndef SIMDUTF_UTF16_H
-#define SIMDUTF_UTF16_H
-
-namespace simdutf {
-namespace scalar {
-namespace {
-namespace utf16 {
-
-inline simdutf_warn_unused uint16_t swap_bytes(const uint16_t word) {
-  return uint16_t((word >> 8) | (word << 8));
-}
-
-template 
-inline simdutf_warn_unused bool validate(const char16_t *buf, size_t len) noexcept {
-  const uint16_t *data = reinterpret_cast(buf);
-  uint64_t pos = 0;
-  while (pos < len) {
-    uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
-    if((word &0xF800) == 0xD800) {
-        if(pos + 1 >= len) { return false; }
-        uint16_t diff = uint16_t(word - 0xD800);
-        if(diff > 0x3FF) { return false; }
-        uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
-        uint16_t diff2 = uint16_t(next_word - 0xDC00);
-        if(diff2 > 0x3FF) { return false; }
-        pos += 2;
-    } else {
-        pos++;
-    }
-  }
-  return true;
-}
-
-template 
-inline simdutf_warn_unused result validate_with_errors(const char16_t *buf, size_t len) noexcept {
-  const uint16_t *data = reinterpret_cast(buf);
-  size_t pos = 0;
-  while (pos < len) {
-    uint16_t word = !match_system(big_endian) ? swap_bytes(data[pos]) : data[pos];
-    if((word & 0xF800) == 0xD800) {
-        if(pos + 1 >= len) { return result(error_code::SURROGATE, pos); }
-        uint16_t diff = uint16_t(word - 0xD800);
-        if(diff > 0x3FF) { return result(error_code::SURROGATE, pos); }
-        uint16_t next_word = !match_system(big_endian) ? swap_bytes(data[pos + 1]) : data[pos + 1];
-        uint16_t diff2 = uint16_t(next_word - 0xDC00);
-        if(diff2 > 0x3FF) { return result(error_code::SURROGATE, pos); }
-        pos += 2;
-    } else {
-        pos++;
-    }
-  }
-  return result(error_code::SUCCESS, pos);
-}
-
-template 
-inline size_t count_code_points(const char16_t* buf, size_t len) {
+inline size_t utf8_length_from_utf32(const char32_t* buf, size_t len) {
   // We are not BOM aware.
-  const uint16_t * p = reinterpret_cast(buf);
+  const uint32_t * p = reinterpret_cast(buf);
   size_t counter{0};
   for(size_t i = 0; i < len; i++) {
-    uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
-    counter += ((word & 0xFC00) != 0xDC00);
+    // credit: @ttsugriy  for the vectorizable approach
+    counter++;                                      // ASCII
+    counter += static_cast(p[i] > 0x7F);    // two-byte
+    counter += static_cast(p[i] > 0x7FF);   // three-byte
+    counter += static_cast(p[i] > 0xFFFF);  // four-bytes
   }
   return counter;
 }
 
-template 
-inline size_t utf8_length_from_utf16(const char16_t* buf, size_t len) {
+inline size_t utf16_length_from_utf32(const char32_t* buf, size_t len) {
   // We are not BOM aware.
-  const uint16_t * p = reinterpret_cast(buf);
+  const uint32_t * p = reinterpret_cast(buf);
   size_t counter{0};
   for(size_t i = 0; i < len; i++) {
-    uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
-    /** ASCII **/
-    if(word <= 0x7F) { counter++; }
-    /** two-byte **/
-    else if (word <= 0x7FF) { counter += 2; }
-    /** three-byte **/
-    else if((word <= 0xD7FF) || (word >= 0xE000)) { counter += 3; }
-    /** surrogates -- 4 bytes **/
-    else { counter += 2; }
+    counter++;                                      // non-surrogate word
+    counter += static_cast(p[i] > 0xFFFF);  // surrogate pair
   }
   return counter;
 }
 
-template 
-inline size_t utf32_length_from_utf16(const char16_t* buf, size_t len) {
+inline size_t latin1_length_from_utf32(size_t len) {
   // We are not BOM aware.
-  const uint16_t * p = reinterpret_cast(buf);
-  size_t counter{0};
-  for(size_t i = 0; i < len; i++) {
-    uint16_t word = !match_system(big_endian) ? swap_bytes(p[i]) : p[i];
-    counter += ((word & 0xFC00) != 0xDC00);
-  }
-  return counter;
+  return len; // a utf32 codepoint will always represent 1 latin1 character
 }
 
-simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* out) {
-  const uint16_t * input = reinterpret_cast(in);
-  uint16_t * output = reinterpret_cast(out);
-  for (size_t i = 0; i < size; i++) {
-    *output++ = uint16_t(input[i] >> 8 | input[i] << 8);
-  }
-}
 
-} // utf16 namespace
+
+} // utf32 namespace
 } // unnamed namespace
 } // namespace scalar
 } // namespace simdutf
 
 #endif
-/* end file src/scalar/utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32.h
-/* begin file src/scalar/utf32.h */
-#ifndef SIMDUTF_UTF32_H
-#define SIMDUTF_UTF32_H
+/* end file src/scalar/utf32.h */
+/* begin file src/scalar/latin1.h */
+#ifndef SIMDUTF_LATIN1_H
+#define SIMDUTF_LATIN1_H
 
 namespace simdutf {
 namespace scalar {
 namespace {
-namespace utf32 {
-
-inline simdutf_warn_unused bool validate(const char32_t *buf, size_t len) noexcept {
-  const uint32_t *data = reinterpret_cast(buf);
-  uint64_t pos = 0;
-  for(;pos < len; pos++) {
-    uint32_t word = data[pos];
-    if(word > 0x10FFFF || (word >= 0xD800 && word <= 0xDFFF)) {
-        return false;
-    }
-  }
-  return true;
-}
+namespace latin1 {
 
-inline simdutf_warn_unused result validate_with_errors(const char32_t *buf, size_t len) noexcept {
-  const uint32_t *data = reinterpret_cast(buf);
-  size_t pos = 0;
-  for(;pos < len; pos++) {
-    uint32_t word = data[pos];
-    if(word > 0x10FFFF) {
-        return result(error_code::TOO_LARGE, pos);
-    }
-    if(word >= 0xD800 && word <= 0xDFFF) {
-        return result(error_code::SURROGATE, pos);
-    }
-  }
-  return result(error_code::SUCCESS, pos);
+inline size_t utf32_length_from_latin1(size_t len) {
+  // We are not BOM aware.
+  return len; // a utf32 unit will always represent 1 latin1 character
 }
 
-inline size_t utf8_length_from_utf32(const char32_t* buf, size_t len) {
-  // We are not BOM aware.
-  const uint32_t * p = reinterpret_cast(buf);
-  size_t counter{0};
-  for(size_t i = 0; i < len; i++) {
-    /** ASCII **/
-    if(p[i] <= 0x7F) { counter++; }
-    /** two-byte **/
-    else if(p[i] <= 0x7FF) { counter += 2; }
-    /** three-byte **/
-    else if(p[i] <= 0xFFFF) { counter += 3; }
-    /** four-bytes **/
-    else { counter += 4; }
+inline size_t utf8_length_from_latin1(const char *buf, size_t len) {
+  const uint8_t * c = reinterpret_cast(buf);
+  size_t answer = 0;
+  for(size_t i = 0; i>7)) { answer++; }
   }
-  return counter;
+  return answer + len;
 }
 
-inline size_t utf16_length_from_utf32(const char32_t* buf, size_t len) {
-  // We are not BOM aware.
-  const uint32_t * p = reinterpret_cast(buf);
-  size_t counter{0};
-  for(size_t i = 0; i < len; i++) {
-    /** non-surrogate word **/
-    if(p[i] <= 0xFFFF) { counter++; }
-    /** surrogate pair **/
-    else { counter += 2; }
-  }
-  return counter;
+inline size_t utf16_length_from_latin1(size_t len) {
+  return len;
 }
 
 } // utf32 namespace
@@ -10377,9 +10824,8 @@ inline size_t utf16_length_from_utf32(const char32_t* buf, size_t len) {
 } // namespace simdutf
 
 #endif
-/* end file src/scalar/utf32.h */
+/* end file src/scalar/latin1.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf8/valid_utf32_to_utf8.h
 /* begin file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */
 #ifndef SIMDUTF_VALID_UTF32_TO_UTF8_H
 #define SIMDUTF_VALID_UTF32_TO_UTF8_H
@@ -10446,7 +10892,6 @@ inline size_t convert_valid(const char32_t* buf, size_t len, char* utf8_output)
 
 #endif
 /* end file src/scalar/utf32_to_utf8/valid_utf32_to_utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf8/utf32_to_utf8.h
 /* begin file src/scalar/utf32_to_utf8/utf32_to_utf8.h */
 #ifndef SIMDUTF_UTF32_TO_UTF8_H
 #define SIMDUTF_UTF32_TO_UTF8_H
@@ -10562,7 +11007,6 @@ inline result convert_with_errors(const char32_t* buf, size_t len, char* utf8_ou
 #endif
 /* end file src/scalar/utf32_to_utf8/utf32_to_utf8.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf16/valid_utf32_to_utf16.h
 /* begin file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */
 #ifndef SIMDUTF_VALID_UTF32_TO_UTF16_H
 #define SIMDUTF_VALID_UTF32_TO_UTF16_H
@@ -10607,7 +11051,6 @@ inline size_t convert_valid(const char32_t* buf, size_t len, char16_t* utf16_out
 
 #endif
 /* end file src/scalar/utf32_to_utf16/valid_utf32_to_utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf32_to_utf16/utf32_to_utf16.h
 /* begin file src/scalar/utf32_to_utf16/utf32_to_utf16.h */
 #ifndef SIMDUTF_UTF32_TO_UTF16_H
 #define SIMDUTF_UTF32_TO_UTF16_H
@@ -10683,7 +11126,6 @@ inline result convert_with_errors(const char32_t* buf, size_t len, char16_t* utf
 #endif
 /* end file src/scalar/utf32_to_utf16/utf32_to_utf16.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf8/valid_utf16_to_utf8.h
 /* begin file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */
 #ifndef SIMDUTF_VALID_UTF16_TO_UTF8_H
 #define SIMDUTF_VALID_UTF16_TO_UTF8_H
@@ -10703,7 +11145,7 @@ inline size_t convert_valid(const char16_t* buf, size_t len, char* utf8_output)
     if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
       uint64_t v;
       ::memcpy(&v, data + pos, sizeof(uint64_t));
-      if (!match_system(big_endian)) v = (v >> 8) | (v << (64 - 8));
+      if (!match_system(big_endian)) { v = (v >> 8) | (v << (64 - 8)); }
       if ((v & 0xFF80FF80FF80FF80) == 0) {
         size_t final_pos = pos + 4;
         while(pos < final_pos) {
@@ -10758,7 +11200,6 @@ inline size_t convert_valid(const char16_t* buf, size_t len, char* utf8_output)
 
 #endif
 /* end file src/scalar/utf16_to_utf8/valid_utf16_to_utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf8/utf16_to_utf8.h
 /* begin file src/scalar/utf16_to_utf8/utf16_to_utf8.h */
 #ifndef SIMDUTF_UTF16_TO_UTF8_H
 #define SIMDUTF_UTF16_TO_UTF8_H
@@ -10774,11 +11215,11 @@ inline size_t convert(const char16_t* buf, size_t len, char* utf8_output) {
   size_t pos = 0;
   char* start{utf8_output};
   while (pos < len) {
-    // try to convert the next block of 8 ASCII characters
+    // try to convert the next block of 8 bytes
     if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
       uint64_t v;
       ::memcpy(&v, data + pos, sizeof(uint64_t));
-      if (!match_system(big_endian)) v = (v >> 8) | (v << (64 - 8));
+      if (!match_system(big_endian)) { v = (v >> 8) | (v << (64 - 8)); }
       if ((v & 0xFF80FF80FF80FF80) == 0) {
         size_t final_pos = pos + 4;
         while(pos < final_pos) {
@@ -10833,7 +11274,7 @@ inline result convert_with_errors(const char16_t* buf, size_t len, char* utf8_ou
   size_t pos = 0;
   char* start{utf8_output};
   while (pos < len) {
-    // try to convert the next block of 8 ASCII characters
+    // try to convert the next block of 8 bytes
     if (pos + 4 <= len) { // if it is safe to read 8 more bytes, check that they are ascii
       uint64_t v;
       ::memcpy(&v, data + pos, sizeof(uint64_t));
@@ -10894,7 +11335,6 @@ inline result convert_with_errors(const char16_t* buf, size_t len, char* utf8_ou
 #endif
 /* end file src/scalar/utf16_to_utf8/utf16_to_utf8.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf32/valid_utf16_to_utf32.h
 /* begin file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */
 #ifndef SIMDUTF_VALID_UTF16_TO_UTF32_H
 #define SIMDUTF_VALID_UTF16_TO_UTF32_H
@@ -10936,7 +11376,6 @@ inline size_t convert_valid(const char16_t* buf, size_t len, char32_t* utf32_out
 
 #endif
 /* end file src/scalar/utf16_to_utf32/valid_utf16_to_utf32.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf16_to_utf32/utf16_to_utf32.h
 /* begin file src/scalar/utf16_to_utf32/utf16_to_utf32.h */
 #ifndef SIMDUTF_UTF16_TO_UTF32_H
 #define SIMDUTF_UTF16_TO_UTF32_H
@@ -11008,7 +11447,6 @@ inline result convert_with_errors(const char16_t* buf, size_t len, char32_t* utf
 #endif
 /* end file src/scalar/utf16_to_utf32/utf16_to_utf32.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf16/valid_utf8_to_utf16.h
 /* begin file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */
 #ifndef SIMDUTF_VALID_UTF8_TO_UTF16_H
 #define SIMDUTF_VALID_UTF8_TO_UTF16_H
@@ -11093,7 +11531,6 @@ inline size_t convert_valid(const char* buf, size_t len, char16_t* utf16_output)
 
 #endif
 /* end file src/scalar/utf8_to_utf16/valid_utf8_to_utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf16/utf8_to_utf16.h
 /* begin file src/scalar/utf8_to_utf16/utf8_to_utf16.h */
 #ifndef SIMDUTF_UTF8_TO_UTF16_H
 #define SIMDUTF_UTF8_TO_UTF16_H
@@ -11343,7 +11780,6 @@ inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf
 #endif
 /* end file src/scalar/utf8_to_utf16/utf8_to_utf16.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf32/valid_utf8_to_utf32.h
 /* begin file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */
 #ifndef SIMDUTF_VALID_UTF8_TO_UTF32_H
 #define SIMDUTF_VALID_UTF8_TO_UTF32_H
@@ -11409,7 +11845,6 @@ inline size_t convert_valid(const char* buf, size_t len, char32_t* utf32_output)
 
 #endif
 /* end file src/scalar/utf8_to_utf32/valid_utf8_to_utf32.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=scalar/utf8_to_utf32/utf8_to_utf32.h
 /* begin file src/scalar/utf8_to_utf32/utf8_to_utf32.h */
 #ifndef SIMDUTF_UTF8_TO_UTF32_H
 #define SIMDUTF_UTF8_TO_UTF32_H
@@ -11621,4345 +12056,5997 @@ inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf
 
 #endif
 /* end file src/scalar/utf8_to_utf32/utf8_to_utf32.h */
-//
-
-
-SIMDUTF_PUSH_DISABLE_WARNINGS
-SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 
+/* begin file src/scalar/latin1_to_utf8/latin1_to_utf8.h */
+#ifndef SIMDUTF_LATIN1_TO_UTF8_H
+#define SIMDUTF_LATIN1_TO_UTF8_H
 
-#if SIMDUTF_IMPLEMENTATION_ARM64
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/implementation.cpp
-/* begin file src/arm64/implementation.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/begin.h
-/* begin file src/simdutf/arm64/begin.h */
-// redefining SIMDUTF_IMPLEMENTATION to "arm64"
-// #define SIMDUTF_IMPLEMENTATION arm64
-/* end file src/simdutf/arm64/begin.h */
 namespace simdutf {
-namespace arm64 {
+namespace scalar {
 namespace {
-#ifndef SIMDUTF_ARM64_H
-#error "arm64.h must be included"
-#endif
-using namespace simd;
+namespace latin1_to_utf8 {
 
-simdutf_really_inline bool is_ascii(const simd8x64& input) {
-    simd8 bits = input.reduce_or();
-    return bits.max_val() < 0b10000000u;
-}
+inline size_t convert(const char* buf, size_t len, char* utf8_output) {
+  const unsigned char *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char* start{utf8_output};
+  while (pos < len) {
+    // try to convert the next block of 16 ASCII bytes
+    if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
+      uint64_t v1;
+      ::memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 1000 1000, so it makes sense to concatenate everything
+      if ((v & 0x8080808080808080) == 0) { // if NONE of these are set, e.g. all of them are zero, then everything is ASCII
+        size_t final_pos = pos + 16;
+        while(pos < final_pos) {
+          *utf8_output++ = char(buf[pos]);
+          pos++;
+        }
+        continue;
+      }
+    }
 
-simdutf_unused simdutf_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) {
-    simd8 is_second_byte = prev1 >= uint8_t(0b11000000u);
-    simd8 is_third_byte  = prev2 >= uint8_t(0b11100000u);
-    simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u);
-    // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well.
-    // This will work fine because we only have to report errors for cases with 0-1 lead bytes.
-    // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is
-    // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character.
-    // The error will be detected there.
-    return is_second_byte ^ is_third_byte ^ is_fourth_byte;
+    unsigned char byte = data[pos];
+    if((byte & 0x80) == 0) { // if ASCII
+      // will generate one UTF-8 bytes
+      *utf8_output++ = char(byte);
+      pos++;
+    } else {
+      // will generate two UTF-8 bytes
+      *utf8_output++ = char((byte>>6) | 0b11000000);
+      *utf8_output++ = char((byte & 0b111111) | 0b10000000);
+      pos++;
+    }
+  }
+  return utf8_output - start;
 }
 
-simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) {
-    simd8 is_third_byte  = prev2 >= uint8_t(0b11100000u);
-    simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u);
-    return is_third_byte ^ is_fourth_byte;
-}
+} // latin1_to_utf8 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_detect_encodings.cpp
-/* begin file src/arm64/arm_detect_encodings.cpp */
-template
-// len is known to be a multiple of 2 when this is called
-int arm_detect_encodings(const char * buf, size_t len) {
-    const char* start = buf;
-    const char* end = buf + len;
+#endif
+/* end file src/scalar/latin1_to_utf8/latin1_to_utf8.h */
+/* begin file src/scalar/latin1_to_utf16/latin1_to_utf16.h */
+#ifndef SIMDUTF_LATIN1_TO_UTF16_H
+#define SIMDUTF_LATIN1_TO_UTF16_H
 
-    bool is_utf8 = true;
-    bool is_utf16 = true;
-    bool is_utf32 = true;
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace latin1_to_utf16 {
 
-    int out = 0;
+template 
+inline size_t convert(const char* buf, size_t len, char16_t* utf16_output) {
+  const uint8_t* data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char16_t* start{ utf16_output };
 
-    const auto v_d8 = simd8::splat(0xd8);
-    const auto v_f8 = simd8::splat(0xf8);
+  while (pos < len) {
+    uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
+    *utf16_output++ = char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word));
+    pos++;
+  }
 
-    uint32x4_t currentmax = vmovq_n_u32(0x0);
+  return utf16_output - start;
+}
 
-    checker check{};
+template 
+inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_output) {
+  const uint8_t* data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char16_t* start{ utf16_output };
 
-    while(buf + 64 <= end) {
-        uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
-        uint16x8_t secondin = vld1q_u16(reinterpret_cast(buf) + simd16::SIZE / sizeof(char16_t));
-        uint16x8_t thirdin = vld1q_u16(reinterpret_cast(buf) + 2*simd16::SIZE / sizeof(char16_t));
-        uint16x8_t fourthin = vld1q_u16(reinterpret_cast(buf) + 3*simd16::SIZE / sizeof(char16_t));
+  while (pos < len) {
+    uint16_t word = uint16_t(data[pos]); // extend Latin-1 char to 16-bit Unicode code point
+    *utf16_output++ = char16_t(match_system(big_endian) ? word : utf16::swap_bytes(word));
+    pos++;
+  }
 
-        const auto u0 = simd16(in);
-        const auto u1 = simd16(secondin);
-        const auto u2 = simd16(thirdin);
-        const auto u3 = simd16(fourthin);
+  return result(error_code::SUCCESS, utf16_output - start);
+}
 
-        const auto v0 = u0.shr<8>();
-        const auto v1 = u1.shr<8>();
-        const auto v2 = u2.shr<8>();
-        const auto v3 = u3.shr<8>();
+} // latin1_to_utf16 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-        const auto in16 = simd16::pack(v0, v1);
-        const auto nextin16 = simd16::pack(v2, v3);
+#endif
+/* end file src/scalar/latin1_to_utf16/latin1_to_utf16.h */
+/* begin file src/scalar/latin1_to_utf32/latin1_to_utf32.h */
+#ifndef SIMDUTF_LATIN1_TO_UTF32_H
+#define SIMDUTF_LATIN1_TO_UTF32_H
 
-        const uint64_t surrogates_wordmask0 = ((in16 & v_f8) == v_d8).to_bitmask64();
-        const uint64_t surrogates_wordmask1 = ((nextin16 & v_f8) == v_d8).to_bitmask64();
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace latin1_to_utf32 {
 
-        // Check for surrogates
-        if (surrogates_wordmask0 != 0 || surrogates_wordmask1 != 0) {
-            // Cannot be UTF8
-            is_utf8 = false;
-            // Can still be either UTF-16LE or UTF-32 depending on the positions of the surrogates
-            // To be valid UTF-32, a surrogate cannot be in the two most significant bytes of any 32-bit word.
-            // On the other hand, to be valid UTF-16LE, at least one surrogate must be in the two most significant
-            // bytes of a 32-bit word since they always come in pairs in UTF-16LE.
-            // Note that we always proceed in multiple of 4 before this point so there is no offset in 32-bit words.
 
-            if (((surrogates_wordmask0 | surrogates_wordmask1) & 0xf0f0f0f0f0f0f0f0) != 0) {
-                is_utf32 = false;
-                // Code from arm_validate_utf16le.cpp
-                // Not efficient, we do not process surrogates_wordmask1
-                const char16_t * input = reinterpret_cast(buf);
-                const char16_t* end16 = reinterpret_cast(start) + len/2;
+inline size_t convert(const char *buf, size_t len, char32_t *utf32_output) {
+  const unsigned char *data = reinterpret_cast(buf);
+  char32_t* start{utf32_output};
+  for (size_t i = 0; i < len; i++) {
+    *utf32_output++ = (char32_t)data[i];
+  }
+  return utf32_output - start;
+}
 
-                const auto v_fc = simd8::splat(0xfc);
-                const auto v_dc = simd8::splat(0xdc);
+inline result convert_with_errors(const char32_t *buf, size_t len, char32_t *utf32_output) {
+  const uint32_t *data = reinterpret_cast(buf);
+  char32_t* start{utf32_output};
+  for (size_t i = 0; i < len; i++) {
+    *utf32_output++ = (char32_t)data[i];
+  }
+  return result(error_code::SUCCESS, utf32_output - start);
+}
 
-                const uint64_t V0 = ~surrogates_wordmask0;
+} // latin1_to_utf32 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-                const auto vH0 = ((in16 & v_fc) ==  v_dc);
-                const uint64_t H0 = vH0.to_bitmask64();
+#endif
+/* end file src/scalar/latin1_to_utf32/latin1_to_utf32.h */
 
-                const uint64_t L0 = ~H0 & surrogates_wordmask0;
+/* begin file src/scalar/utf8_to_latin1/utf8_to_latin1.h */
+#ifndef SIMDUTF_UTF8_TO_LATIN1_H
+#define SIMDUTF_UTF8_TO_LATIN1_H
+#include 
 
-                const uint64_t a0 = L0 & (H0 >> 4);
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf8_to_latin1 {
 
-                const uint64_t b0 = a0 << 4;
+inline size_t convert(const char* buf, size_t len, char* latin_output) {
+ const uint8_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char* start{latin_output};
 
-                const uint64_t c0 = V0 | a0 | b0;
-                if (c0 == ~0ull) {
-                    input += 16;
-                } else if (c0 == 0xfffffffffffffffull) {
-                    input += 15;
-                } else {
-                    is_utf16 = false;
-                    break;
-                }
+  while (pos < len) {
+    // try to convert the next block of 16 ASCII bytes
+    if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
+      uint64_t v1;
+      ::memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 1000 1000 .... etc
+      if ((v & 0x8080808080808080) == 0) { // if NONE of these are set, e.g. all of them are zero, then everything is ASCII
+        size_t final_pos = pos + 16;
+        while(pos < final_pos) {
+          *latin_output++ = char(buf[pos]);
+          pos++;
+        }
+        continue;
+      }
+    }
 
-                while (input + 16 < end16) {
-                    const auto in0 = simd16(input);
-                    const auto in1 = simd16(input + simd16::SIZE / sizeof(char16_t));
-                    const auto t0 = in0.shr<8>();
-                    const auto t1 = in1.shr<8>();
-                    const simd8 in_16 = simd16::pack(t0, t1);
+    // suppose it is not an all ASCII byte sequence
+    uint8_t leading_byte = data[pos]; // leading byte
+    if (leading_byte < 0b10000000) {
+      // converting one ASCII byte !!!
+      *latin_output++ = char(leading_byte);
+      pos++;
+    } else if ((leading_byte & 0b11100000) == 0b11000000) { // the first three bits indicate:
+      // We have a two-byte UTF-8
+      if(pos + 1 >= len) {
+        return 0;
+      } // minimal bound checking
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
+      // range check -
+      uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
+      if (code_point < 0x80 || 0xFF < code_point) {
+        return 0; // We only care about the range 129-255 which is Non-ASCII latin1 characters. A code_point beneath 0x80 is invalid as it's already covered by bytes whose leading bit is zero. 
+      }
+      *latin_output++ = char(code_point);
+      pos += 2;
+    } else {
+      return 0;
+    }
+  }
+  return latin_output - start;
+}
 
-                    const uint64_t surrogates_wordmask = ((in_16 & v_f8) == v_d8).to_bitmask64();
-                    if(surrogates_wordmask == 0) {
-                        input += 16;
-                    } else {
-                        const uint64_t V = ~surrogates_wordmask;
+inline result convert_with_errors(const char* buf, size_t len, char* latin_output) {
+ const uint8_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char* start{latin_output};
 
-                        const auto vH = ((in_16 & v_fc) ==  v_dc);
-                        const uint64_t H = vH.to_bitmask64();
+  while (pos < len) {
+    // try to convert the next block of 16 ASCII bytes
+    if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
+      uint64_t v1;
+      ::memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 1000 1000...etc
+      if ((v & 0x8080808080808080) == 0) { // if NONE of these are set, e.g. all of them are zero, then everything is ASCII
+        size_t final_pos = pos + 16;
+        while(pos < final_pos) {
+          *latin_output++ = char(buf[pos]);
+          pos++;
+        }
+        continue;
+      }
+    }
+    // suppose it is not an all ASCII byte sequence
+    uint8_t leading_byte = data[pos]; // leading byte
+    if (leading_byte < 0b10000000) {
+      // converting one ASCII byte !!!
+      *latin_output++ = char(leading_byte);
+      pos++;
+    } else if ((leading_byte & 0b11100000) == 0b11000000) { // the first three bits indicate:
+      // We have a two-byte UTF-8
+      if(pos + 1 >= len) {
+        return result(error_code::TOO_SHORT, pos); } // minimal bound checking
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) {
+        return result(error_code::TOO_SHORT, pos); } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
+      // range check -
+      uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
+      if (code_point < 0x80) {
+        return result(error_code::OVERLONG, pos);
+      }
+      if (0xFF < code_point) {
+        return result(error_code::TOO_LARGE, pos);
+      } // We only care about the range 129-255 which is Non-ASCII latin1 characters
+      *latin_output++ = char(code_point);
+      pos += 2;
+    } else if ((leading_byte & 0b11110000) == 0b11100000) {
+      // We have a three-byte UTF-8
+      return result(error_code::TOO_LARGE, pos);
+    } else if ((leading_byte & 0b11111000) == 0b11110000) { // 0b11110000
+      // we have a 4-byte UTF-8 word.
+      return result(error_code::TOO_LARGE, pos);
+    } else {
+      // we either have too many continuation bytes or an invalid leading byte
+      if ((leading_byte & 0b11000000) == 0b10000000) {
+        return result(error_code::TOO_LONG, pos);
+      }
 
-                        const uint64_t L = ~H & surrogates_wordmask;
+      return result(error_code::HEADER_BITS, pos);
 
-                        const uint64_t a = L & (H >> 4);
+    }
+  }
+  return result(error_code::SUCCESS, latin_output - start);
+}
 
-                        const uint64_t b = a << 4;
 
-                        const uint64_t c = V | a | b;
-                        if (c == ~0ull) {
-                            input += 16;
-                        } else if (c == 0xfffffffffffffffull) {
-                            input += 15;
-                        } else {
-                            is_utf16 = false;
-                            break;
-                        }
-                    }
-                }
-            } else {
-                is_utf16 = false;
-                // Check for UTF-32
-                if (len % 4 == 0) {
-                    const char32_t * input = reinterpret_cast(buf);
-                    const char32_t* end32 = reinterpret_cast(start) + len/4;
+inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char* latin1_output) {
+  size_t extra_len{0};
+  // We potentially need to go back in time and find a leading byte.
+  // In theory '3' would be sufficient, but sometimes the error can go back quite far.
+  size_t how_far_back = prior_bytes;
+  // size_t how_far_back = 3; // 3 bytes in the past + current position
+  // if(how_far_back >= prior_bytes) { how_far_back = prior_bytes; }
+  bool found_leading_bytes{false};
+  // important: it is i <= how_far_back and not 'i < how_far_back'.
+  for(size_t i = 0; i <= how_far_back; i++) {
+    unsigned char byte = buf[0-i];
+    found_leading_bytes = ((byte & 0b11000000) != 0b10000000);
+    if(found_leading_bytes) {
+      buf -= i;
+      extra_len = i;
+      break;
+    }
+  }
+  //
+  // It is possible for this function to return a negative count in its result.
+  // C++ Standard Section 18.1 defines size_t is in  which is described in C Standard as .
+  // C Standard Section 4.1.5 defines size_t as an unsigned integral type of the result of the sizeof operator
+  //
+  // An unsigned type will simply wrap round arithmetically (well defined).
+  //
+  if(!found_leading_bytes) {
+    // If how_far_back == 3, we may have four consecutive continuation bytes!!!
+    // [....] [continuation] [continuation] [continuation] | [buf is continuation]
+    // Or we possibly have a stream that does not start with a leading byte.
+    return result(error_code::TOO_LONG, 0-how_far_back);
+  }
+  result res = convert_with_errors(buf, len + extra_len, latin1_output);
+  if (res.error) {
+    res.count -= extra_len;
+  }
+  return res;
+}
 
-                    // Must start checking for surrogates
-                    uint32x4_t currentoffsetmax = vmovq_n_u32(0x0);
-                    const uint32x4_t offset = vmovq_n_u32(0xffff2000);
-                    const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
 
-                    const uint32x4_t in32 =  vreinterpretq_u32_u16(in);
-                    const uint32x4_t secondin32 =  vreinterpretq_u32_u16(secondin);
-                    const uint32x4_t thirdin32 =  vreinterpretq_u32_u16(thirdin);
-                    const uint32x4_t fourthin32 =  vreinterpretq_u32_u16(fourthin);
+} // utf8_to_latin1 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-                    currentmax = vmaxq_u32(in32,currentmax);
-                    currentmax = vmaxq_u32(secondin32,currentmax);
-                    currentmax = vmaxq_u32(thirdin32,currentmax);
-                    currentmax = vmaxq_u32(fourthin32,currentmax);
-
-                    currentoffsetmax = vmaxq_u32(vaddq_u32(in32, offset), currentoffsetmax);
-                    currentoffsetmax = vmaxq_u32(vaddq_u32(secondin32, offset), currentoffsetmax);
-                    currentoffsetmax = vmaxq_u32(vaddq_u32(thirdin32, offset), currentoffsetmax);
-                    currentoffsetmax = vmaxq_u32(vaddq_u32(fourthin32, offset), currentoffsetmax);
-
-                    while (input + 4 < end32) {
-                        const uint32x4_t in_32 = vld1q_u32(reinterpret_cast(input));
-                        currentmax = vmaxq_u32(in_32,currentmax);
-                        currentoffsetmax = vmaxq_u32(vaddq_u32(in_32, offset), currentoffsetmax);
-                        input += 4;
-                    }
-
-                    uint32x4_t forbidden_words = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
-                    if(vmaxvq_u32(forbidden_words) != 0) {
-                        is_utf32 = false;
-                    }
-                } else {
-                    is_utf32 = false;
-                }
-            }
-            break;
-        }
-        // If no surrogate, validate under other encodings as well
-
-        // UTF-32 validation
-        currentmax = vmaxq_u32(vreinterpretq_u32_u16(in),currentmax);
-        currentmax = vmaxq_u32(vreinterpretq_u32_u16(secondin),currentmax);
-        currentmax = vmaxq_u32(vreinterpretq_u32_u16(thirdin),currentmax);
-        currentmax = vmaxq_u32(vreinterpretq_u32_u16(fourthin),currentmax);
-
-        // UTF-8 validation
-        // Relies on ../generic/utf8_validation/utf8_lookup4_algorithm.h
-        simd::simd8x64 in8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(secondin), vreinterpretq_u8_u16(thirdin), vreinterpretq_u8_u16(fourthin));
-        check.check_next_input(in8);
-
-        buf += 64;
-    }
+#endif
+/* end file src/scalar/utf8_to_latin1/utf8_to_latin1.h */
+/* begin file src/scalar/utf16_to_latin1/utf16_to_latin1.h */
+#ifndef SIMDUTF_UTF16_TO_LATIN1_H
+#define SIMDUTF_UTF16_TO_LATIN1_H
 
-    // Check which encodings are possible
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf16_to_latin1 {
 
-    if (is_utf8) {
-        if (static_cast(buf - start) != len) {
-            uint8_t block[64]{};
-            std::memset(block, 0x20, 64);
-            std::memcpy(block, buf, len - (buf - start));
-            simd::simd8x64 in(block);
-            check.check_next_input(in);
-        }
-        if (!check.errors()) {
-            out |= simdutf::encoding_type::UTF8;
-        }
-    }
+#include   // for std::memcpy
 
-    if (is_utf16 && scalar::utf16::validate(reinterpret_cast(buf), (len - (buf - start))/2)) {
-        out |= simdutf::encoding_type::UTF16_LE;
-    }
+template 
+inline size_t convert(const char16_t* buf, size_t len, char* latin_output) {
+  const uint16_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  std::vector temp_output(len);
+  char* current_write = temp_output.data();
+  uint16_t word = 0;
+  uint16_t too_large = 0;
 
-    if (is_utf32 && (len % 4 == 0)) {
-        const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
-        uint32x4_t is_zero = veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax);
-        if (vmaxvq_u32(is_zero) == 0 && scalar::utf32::validate(reinterpret_cast(buf), (len - (buf - start))/4)) {
-            out |= simdutf::encoding_type::UTF32_LE;
-        }
-    }
+  while (pos < len) {
+    word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
+    too_large |= word;
+    *current_write++ = char(word & 0xFF);
+    pos++;
+  }
+  if((too_large & 0xFF00) != 0) { return 0; }
 
-    return out;
+  // Only copy to latin_output if there were no errors
+  std::memcpy(latin_output, temp_output.data(), len);
+  
+  return current_write - temp_output.data();
 }
-/* end file src/arm64/arm_detect_encodings.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_validate_utf16.cpp
-/* begin file src/arm64/arm_validate_utf16.cpp */
 template 
-const char16_t* arm_validate_utf16(const char16_t* input, size_t size) {
-    const char16_t* end = input + size;
-    const auto v_d8 = simd8::splat(0xd8);
-    const auto v_f8 = simd8::splat(0xf8);
-    const auto v_fc = simd8::splat(0xfc);
-    const auto v_dc = simd8::splat(0xdc);
-    while (input + 16 < end) {
-        // 0. Load data: since the validation takes into account only higher
-        //    byte of each word, we compress the two vectors into one which
-        //    consists only the higher bytes.
-        auto in0 = simd16(input);
-        auto in1 = simd16(input + simd16::SIZE / sizeof(char16_t));
-        if (!match_system(big_endian)) {
-            #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-            const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-            #else
-            const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-            #endif
-            in0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in0), swap));
-            in1 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in1), swap));
-        }
-        const auto t0 = in0.shr<8>();
-        const auto t1 = in1.shr<8>();
-        const simd8 in = simd16::pack(t0, t1);
-        // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
-        const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64();
-        if(surrogates_wordmask == 0) {
-            input += 16;
-        } else {
-            // 2. We have some surrogates that have to be distinguished:
-            //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
-            //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
-            //
-            //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
-
-            // V - non-surrogate words
-            //     V = not surrogates_wordmask
-            const uint64_t V = ~surrogates_wordmask;
+inline result convert_with_errors(const char16_t* buf, size_t len, char* latin_output) {
+ const uint16_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char* start{latin_output};
+  uint16_t word;
 
-            // H - word-mask for high surrogates: the six highest bits are 0b1101'11
-            const auto vH = ((in & v_fc) ==  v_dc);
-            const uint64_t H = vH.to_bitmask64();
+  while (pos < len) {
+    if (pos + 16 <= len) { // if it is safe to read 32 more bytes, check that they are Latin1
+      uint64_t v1, v2, v3, v4;
+      ::memcpy(&v1, data + pos, sizeof(uint64_t));
+      ::memcpy(&v2, data + pos + 4, sizeof(uint64_t));
+      ::memcpy(&v3, data + pos + 8, sizeof(uint64_t));
+      ::memcpy(&v4, data + pos  + 12, sizeof(uint64_t));
 
-            // L - word mask for low surrogates
-            //     L = not H and surrogates_wordmask
-            const uint64_t L = ~H & surrogates_wordmask;
+      if (!match_system(big_endian)) { v1 = (v1 >> 8) | (v1 << (64 - 8)); }
+      if (!match_system(big_endian)) { v2 = (v2 >> 8) | (v2 << (64 - 8)); }
+      if (!match_system(big_endian)) { v3 = (v3 >> 8) | (v3 << (64 - 8)); }
+      if (!match_system(big_endian)) { v4 = (v1 >> 8) | (v4 << (64 - 8)); }
 
-            const uint64_t a = L & (H >> 4); // A low surrogate must be followed by high one.
-                              // (A low surrogate placed in the 7th register's word
-                              // is an exception we handle.)
-            const uint64_t b = a << 4; // Just mark that the opposite fact is hold,
-                          // thanks to that we have only two masks for valid case.
-            const uint64_t c = V | a | b;      // Combine all the masks into the final one.
-            if (c == ~0ull) {
-                // The whole input register contains valid UTF-16, i.e.,
-                // either single words or proper surrogate pairs.
-                input += 16;
-            } else if (c == 0xfffffffffffffffull) {
-                // The 15 lower words of the input register contains valid UTF-16.
-                // The 15th word may be either a low or high surrogate. It the next
-                // iteration we 1) check if the low surrogate is followed by a high
-                // one, 2) reject sole high surrogate.
-                input += 15;
-            } else {
-                return nullptr;
-            }
+      if (((v1 | v2 | v3 | v4) & 0xFF00FF00FF00FF00) == 0) {
+        size_t final_pos = pos + 16;
+        while(pos < final_pos) {
+          *latin_output++ = !match_system(big_endian) ? char(utf16::swap_bytes(data[pos])) : char(data[pos]);
+          pos++;
         }
+        continue;
+      }
     }
-    return input;
+    word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
+    if((word & 0xFF00 ) == 0) {
+        *latin_output++ = char(word & 0xFF);
+        pos++;
+    } else { return result(error_code::TOO_LARGE, pos); }
+  }
+  return result(error_code::SUCCESS,latin_output - start);
 }
 
 
-template 
-const result arm_validate_utf16_with_errors(const char16_t* input, size_t size) {
-    const char16_t* start = input;
-    const char16_t* end = input + size;
-
-    const auto v_d8 = simd8::splat(0xd8);
-    const auto v_f8 = simd8::splat(0xf8);
-    const auto v_fc = simd8::splat(0xfc);
-    const auto v_dc = simd8::splat(0xdc);
-    while (input + 16 < end) {
-        // 0. Load data: since the validation takes into account only higher
-        //    byte of each word, we compress the two vectors into one which
-        //    consists only the higher bytes.
-        auto in0 = simd16(input);
-        auto in1 = simd16(input + simd16::SIZE / sizeof(char16_t));
+} // utf16_to_latin1 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-        if (!match_system(big_endian)) {
-            #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-            const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-            #else
-            const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-            #endif
-            in0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in0), swap));
-            in1 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in1), swap));
-        }
-        const auto t0 = in0.shr<8>();
-        const auto t1 = in1.shr<8>();
-        const simd8 in = simd16::pack(t0, t1);
-        // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
-        const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64();
-        if(surrogates_wordmask == 0) {
-            input += 16;
-        } else {
-            // 2. We have some surrogates that have to be distinguished:
-            //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
-            //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
-            //
-            //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
+#endif
+/* end file src/scalar/utf16_to_latin1/utf16_to_latin1.h */
+/* begin file src/scalar/utf32_to_latin1/utf32_to_latin1.h */
+#ifndef SIMDUTF_UTF32_TO_LATIN1_H
+#define SIMDUTF_UTF32_TO_LATIN1_H
 
-            // V - non-surrogate words
-            //     V = not surrogates_wordmask
-            const uint64_t V = ~surrogates_wordmask;
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf32_to_latin1 {
 
-            // H - word-mask for high surrogates: the six highest bits are 0b1101'11
-            const auto vH = ((in & v_fc) ==  v_dc);
-            const uint64_t H = vH.to_bitmask64();
+inline size_t convert(const char32_t *buf, size_t len, char *latin1_output) {
+  const uint32_t *data = reinterpret_cast(buf);
+  char* start = latin1_output;
+  uint32_t utf32_char;
+  size_t pos = 0;
+  uint32_t too_large = 0;
 
-            // L - word mask for low surrogates
-            //     L = not H and surrogates_wordmask
-            const uint64_t L = ~H & surrogates_wordmask;
+  while (pos < len) {
+    utf32_char = (uint32_t)data[pos];
+    too_large |= utf32_char;
+    *latin1_output++ = (char)(utf32_char & 0xFF);
+    pos++;
+  }
+  if((too_large & 0xFFFFFF00) != 0) { return 0; }
+  return latin1_output - start;
+}
 
-            const uint64_t a = L & (H >> 4); // A low surrogate must be followed by high one.
-                              // (A low surrogate placed in the 7th register's word
-                              // is an exception we handle.)
-            const uint64_t b = a << 4; // Just mark that the opposite fact is hold,
-                          // thanks to that we have only two masks for valid case.
-            const uint64_t c = V | a | b;      // Combine all the masks into the final one.
-            if (c == ~0ull) {
-                // The whole input register contains valid UTF-16, i.e.,
-                // either single words or proper surrogate pairs.
-                input += 16;
-            } else if (c == 0xfffffffffffffffull) {
-                // The 15 lower words of the input register contains valid UTF-16.
-                // The 15th word may be either a low or high surrogate. It the next
-                // iteration we 1) check if the low surrogate is followed by a high
-                // one, 2) reject sole high surrogate.
-                input += 15;
-            } else {
-                return result(error_code::SURROGATE, input - start);
-            }
-        }
+inline result convert_with_errors(const char32_t *buf, size_t len, char *latin1_output) {
+  const uint32_t *data = reinterpret_cast(buf);
+  char* start{latin1_output};
+  size_t pos = 0;
+  while (pos < len) {
+    if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are Latin1
+      uint64_t v;
+      ::memcpy(&v, data + pos, sizeof(uint64_t));
+      if ((v & 0xFFFFFF00FFFFFF00) == 0) {
+        *latin1_output++ = char(buf[pos]);
+        *latin1_output++ = char(buf[pos+1]);
+        pos += 2;
+        continue;
+      }
     }
-    return result(error_code::SUCCESS, input - start);
+    uint32_t utf32_char = data[pos];
+    if ((utf32_char & 0xFFFFFF00) == 0) { // Check if the character can be represented in Latin-1
+      *latin1_output++ = (char)(utf32_char & 0xFF);
+      pos++;
+    } else { return result(error_code::TOO_LARGE, pos); };
+  }
+  return result(error_code::SUCCESS, latin1_output - start);
 }
-/* end file src/arm64/arm_validate_utf16.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_validate_utf32le.cpp
-/* begin file src/arm64/arm_validate_utf32le.cpp */
-
-const char32_t* arm_validate_utf32le(const char32_t* input, size_t size) {
-    const char32_t* end = input + size;
 
-    const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
-    const uint32x4_t offset = vmovq_n_u32(0xffff2000);
-    const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
-    uint32x4_t currentmax = vmovq_n_u32(0x0);
-    uint32x4_t currentoffsetmax = vmovq_n_u32(0x0);
+} // utf32_to_latin1 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-    while (input + 4 < end) {
-        const uint32x4_t in = vld1q_u32(reinterpret_cast(input));
-        currentmax = vmaxq_u32(in,currentmax);
-        currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax);
-        input += 4;
-    }
+#endif
+/* end file src/scalar/utf32_to_latin1/utf32_to_latin1.h */
 
-    uint32x4_t is_zero = veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax);
-    if(vmaxvq_u32(is_zero) != 0) {
-        return nullptr;
-    }
+/* begin file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */
+#ifndef SIMDUTF_VALID_UTF8_TO_LATIN1_H
+#define SIMDUTF_VALID_UTF8_TO_LATIN1_H
 
-    is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
-    if(vmaxvq_u32(is_zero) != 0) {
-        return nullptr;
-    }
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf8_to_latin1 {
 
-    return input;
-}
+inline size_t convert_valid(const char* buf, size_t len, char* latin_output) {
+ const uint8_t *data = reinterpret_cast(buf);
 
+  size_t pos = 0;
+  char* start{latin_output};
 
-const result arm_validate_utf32le_with_errors(const char32_t* input, size_t size) {
-    const char32_t* start = input;
-    const char32_t* end = input + size;
-
-    const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
-    const uint32x4_t offset = vmovq_n_u32(0xffff2000);
-    const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
-    uint32x4_t currentmax = vmovq_n_u32(0x0);
-    uint32x4_t currentoffsetmax = vmovq_n_u32(0x0);
-
-    while (input + 4 < end) {
-        const uint32x4_t in = vld1q_u32(reinterpret_cast(input));
-        currentmax = vmaxq_u32(in,currentmax);
-        currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax);
-
-        uint32x4_t is_zero = veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax);
-        if(vmaxvq_u32(is_zero) != 0) {
-            return result(error_code::TOO_LARGE, input - start);
-        }
-
-        is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
-        if(vmaxvq_u32(is_zero) != 0) {
-            return result(error_code::SURROGATE, input - start);
+  while (pos < len) {
+    // try to convert the next block of 16 ASCII bytes
+    if (pos + 16 <= len) { // if it is safe to read 16 more bytes, check that they are ascii
+      uint64_t v1;
+      ::memcpy(&v1, data + pos, sizeof(uint64_t));
+      uint64_t v2;
+      ::memcpy(&v2, data + pos + sizeof(uint64_t), sizeof(uint64_t));
+      uint64_t v{v1 | v2}; // We are only interested in these bits: 1000 1000 1000 1000, so it makes sense to concatenate everything
+      if ((v & 0x8080808080808080) == 0) { // if NONE of these are set, e.g. all of them are zero, then everything is ASCII
+        size_t final_pos = pos + 16;
+        while(pos < final_pos) {
+          *latin_output++ = char(buf[pos]);
+          pos++;
         }
-
-        input += 4;
+        continue;
+      }
     }
 
-    return result(error_code::SUCCESS, input - start);
-}
-/* end file src/arm64/arm_validate_utf32le.cpp */
-
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf8_to_utf16.cpp
-/* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */
-// Convert up to 12 bytes from utf8 to utf16 using a mask indicating the
-// end of the code points. Only the least significant 12 bits of the mask
-// are accessed.
-// It returns how many bytes were consumed (up to 12).
-template 
-size_t convert_masked_utf8_to_utf16(const char *input,
-                           uint64_t utf8_end_of_code_point_mask,
-                           char16_t *&utf16_output) {
-  // we use an approach where we try to process up to 12 input bytes.
-  // Why 12 input bytes and not 16? Because we are concerned with the size of
-  // the lookup tables. Also 12 is nicely divisible by two and three.
-  //
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-  const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-  #else
-  const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-  #endif
-  uint8x16_t in = vld1q_u8(reinterpret_cast(input));
-  const uint16_t input_utf8_end_of_code_point_mask =
-      utf8_end_of_code_point_mask & 0xfff;
-  //
-  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
-  // beneficial to have fast paths that depend on branch prediction but have less latency.
-  // This results in more instructions but, potentially, also higher speeds.
-  //
-  // We first try a few fast paths.
-  if((utf8_end_of_code_point_mask & 0xffff) == 0xffff) {
-    // We process in chunks of 16 bytes
-    uint16x8_t ascii_first = vmovl_u8(vget_low_u8 (in));
-    uint16x8_t ascii_second = vmovl_high_u8(in);
-    if (!match_system(big_endian)) {
-      ascii_first = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(ascii_first), swap));
-      ascii_second = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(ascii_second), swap));
+    // suppose it is not an all ASCII byte sequence
+    uint8_t leading_byte = data[pos]; // leading byte
+    if (leading_byte < 0b10000000) {
+      // converting one ASCII byte !!!
+      *latin_output++ = char(leading_byte);
+      pos++;
+    } else if ((leading_byte & 0b11100000) == 0b11000000) { // the first three bits indicate:
+      // We have a two-byte UTF-8
+      if(pos + 1 >= len) { break; } // minimal bound checking
+      if ((data[pos + 1] & 0b11000000) != 0b10000000) { return 0; } // checks if the next byte is a valid continuation byte in UTF-8. A valid continuation byte starts with 10.
+      // range check -
+      uint32_t code_point = (leading_byte & 0b00011111) << 6 | (data[pos + 1] & 0b00111111); // assembles the Unicode code point from the two bytes. It does this by discarding the leading 110 and 10 bits from the two bytes, shifting the remaining bits of the first byte, and then combining the results with a bitwise OR operation.
+      *latin_output++ = char(code_point);
+      pos += 2;
+    } else {
+      // we may have a continuation but we do not do error checking
+      return 0;
     }
-    vst1q_u16(reinterpret_cast(utf16_output), ascii_first);
-    vst1q_u16(reinterpret_cast(utf16_output) + 8, ascii_second);
-    utf16_output += 16; // We wrote 16 16-bit characters.
-    return 16; // We consumed 16 bytes.
-  }
-  if((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa) {
-    // We want to take 8 2-byte UTF-8 words and turn them into 8 2-byte UTF-16 words.
-    // There is probably a more efficient sequence, but the following might do.
-    uint8x16_t perm = vqtbl1q_u8(in, swap);
-    uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x7f)));
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x1f00)));
-    uint8x16_t composed = vorrq_u8(ascii, vreinterpretq_u8_u16(vshrq_n_u16(vreinterpretq_u16_u8(highbyte), 2)));
-    if (!match_system(big_endian)) composed = vqtbl1q_u8(composed, swap);
-    vst1q_u8(reinterpret_cast(utf16_output), composed);
-    utf16_output += 8; // We wrote 16 bytes, 8 code points.
-    return 16;
   }
-  if(input_utf8_end_of_code_point_mask == 0x924) {
-    // We want to take 4 3-byte UTF-8 words and turn them into 4 2-byte UTF-16 words.
-    // There is probably a more efficient sequence, but the following might do.
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-    const uint8x16_t sh = make_uint8x16_t(2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255);
-#else
-    const uint8x16_t sh = {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255};
+  return latin_output - start;
+}
+
+} // utf8_to_latin1 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
+
 #endif
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f))); // 7 or 6 bits
-    uint8x16_t middlebyte =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f00))); // 5 or 6 bits
-    uint8x16_t middlebyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlebyte), 2));
-    uint32x4_t highbyte =
-        vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x0f0000)))); // 4 bits
-    uint32x4_t highbyte_shifted = vshrq_n_u32(highbyte, 4);
-    uint32x4_t composed =
-        vorrq_u32(vorrq_u32(vreinterpretq_u32_u8(ascii), vreinterpretq_u32_u8(middlebyte_shifted)), highbyte_shifted);
-    uint16x8_t composed_repacked = vmovn_high_u32(vmovn_u32(composed), composed);
-    if (!match_system(big_endian)) composed_repacked = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(composed_repacked), swap));
-    vst1q_u16(reinterpret_cast(utf16_output), composed_repacked);
-    utf16_output += 4;
-    return 12;
-  }
-  /// We do not have a fast path available, so we fallback.
+/* end file src/scalar/utf8_to_latin1/valid_utf8_to_latin1.h */
+/* begin file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */
+#ifndef SIMDUTF_VALID_UTF16_TO_LATIN1_H
+#define SIMDUTF_VALID_UTF16_TO_LATIN1_H
 
-  const uint8_t idx =
-      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
-  const uint8_t consumed =
-      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf16_to_latin1 {
 
+template 
+inline size_t convert_valid(const char16_t* buf, size_t len, char* latin_output) {
+ const uint16_t *data = reinterpret_cast(buf);
+  size_t pos = 0;
+  char* start{latin_output};
+  uint16_t word = 0;
 
-  if (idx < 64) {
-    // SIX (6) input code-words
-    // this is a relatively easy scenario
-    // we process SIX (6) input code-words. The max length in bytes of six code
-    // words spanning between 1 and 2 bytes each is 12 bytes.
-    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x7f)));
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x1f00)));
-    uint8x16_t composed = vorrq_u8(ascii, vreinterpretq_u8_u16(vshrq_n_u16(vreinterpretq_u16_u8(highbyte), 2)));
-    if (!match_system(big_endian)) composed = vqtbl1q_u8(composed, swap);
-    vst1q_u8(reinterpret_cast(utf16_output), composed);
-    utf16_output += 6; // We wrote 12 bytes, 6 code points.
-  } else if (idx < 145) {
-    // FOUR (4) input code-words
-    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f))); // 7 or 6 bits
-    uint8x16_t middlebyte =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f00))); // 5 or 6 bits
-    uint8x16_t middlebyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlebyte), 2));
-    uint32x4_t highbyte =
-        vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x0f0000)))); // 4 bits
-    uint32x4_t highbyte_shifted = vshrq_n_u32(highbyte, 4);
-    uint32x4_t composed =
-        vorrq_u32(vorrq_u32(vreinterpretq_u32_u8(ascii), vreinterpretq_u32_u8(middlebyte_shifted)), highbyte_shifted);
-    uint16x8_t composed_repacked = vmovn_high_u32(vmovn_u32(composed), composed);
-    if (!match_system(big_endian)) composed_repacked = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(composed_repacked), swap));
-    vst1q_u16(reinterpret_cast(utf16_output), composed_repacked);
-    utf16_output += 4;
-  } else if (idx < 209) {
-    // TWO (2) input code-words
-    //////////////
-    // There might be garbage inputs where a leading byte mascarades as a four-byte
-    // leading byte (by being followed by 3 continuation byte), but is not greater than
-    // 0xf0. This could trigger a buffer overflow if we only counted leading
-    // bytes of the form 0xf0 as generating surrogate pairs, without further UTF-8 validation.
-    // Thus we must be careful to ensure that only leading bytes at least as large as 0xf0 generate surrogate pairs.
-    // We do as at the cost of an extra mask.
-    /////////////
-    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f)));
-    uint8x16_t middlebyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f00)));
-    uint8x16_t middlebyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlebyte), 2));
-    uint8x16_t middlehighbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f0000)));
-    // correct for spurious high bit
-    uint8x16_t correct =
-        vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x400000)))), 1));
-    middlehighbyte = veorq_u8(correct, middlehighbyte);
-    uint8x16_t middlehighbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlehighbyte), 4));
-    // We deliberately carry the leading four bits if they are present, we remove
-    // them later when computing hightenbits.
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0xff000000)));
-    uint8x16_t highbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6));
-    // When we need to generate a surrogate pair (leading byte > 0xF0), then
-    // the corresponding 32-bit value in 'composed'  will be greater than
-    // > (0xff00000>>6) or > 0x3c00000. This can be used later to identify the
-    // location of the surrogate pairs.
-    uint8x16_t composed =
-        vorrq_u8(vorrq_u8(ascii, middlebyte_shifted),
-                     vorrq_u8(highbyte_shifted, middlehighbyte_shifted));
-    uint32x4_t composedminus =
-        vsubq_u32(vreinterpretq_u32_u8(composed), vmovq_n_u32(0x10000));
-    uint32x4_t lowtenbits =
-        vandq_u32(composedminus, vmovq_n_u32(0x3ff));
-    // Notice the 0x3ff mask:
-    uint32x4_t hightenbits = vandq_u32(vshrq_n_u32(composedminus, 10), vmovq_n_u32(0x3ff));
-    uint32x4_t lowtenbitsadd =
-        vaddq_u32(lowtenbits, vmovq_n_u32(0xDC00));
-    uint32x4_t hightenbitsadd =
-        vaddq_u32(hightenbits, vmovq_n_u32(0xD800));
-    uint32x4_t lowtenbitsaddshifted = vshlq_n_u32(lowtenbitsadd, 16);
-    uint32x4_t surrogates =
-        vorrq_u32(hightenbitsadd, lowtenbitsaddshifted);
-    uint32_t basic_buffer[4];
-    uint32_t basic_buffer_swap[4];
-    if (!match_system(big_endian)) {
-      vst1q_u32(basic_buffer_swap, vreinterpretq_u32_u8(vqtbl1q_u8(composed, swap)));
-      surrogates = vreinterpretq_u32_u8(vqtbl1q_u8(vreinterpretq_u8_u32(surrogates), swap));
-    }
-    vst1q_u32(basic_buffer, vreinterpretq_u32_u8(composed));
-    uint32_t surrogate_buffer[4];
-    vst1q_u32(surrogate_buffer, surrogates);
-    for (size_t i = 0; i < 3; i++) {
-      if(basic_buffer[i] > 0x3c00000) {
-        utf16_output[0] = uint16_t(surrogate_buffer[i] & 0xffff);
-        utf16_output[1] = uint16_t(surrogate_buffer[i] >> 16);
-        utf16_output += 2;
-      } else {
-        utf16_output[0] = !match_system(big_endian) ? uint16_t(basic_buffer_swap[i]) : uint16_t(basic_buffer[i]);
-        utf16_output++;
-      }
-    }
-  } else {
-    // here we know that there is an error but we do not handle errors
+  while (pos < len) {
+    word = !match_system(big_endian) ? utf16::swap_bytes(data[pos]) : data[pos];
+    *latin_output++ = char(word);
+    pos++;
   }
-  return consumed;
+
+  return latin_output - start;
 }
-/* end file src/arm64/arm_convert_utf8_to_utf16.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf8_to_utf32.cpp
-/* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */
-// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the
-// end of the code points. Only the least significant 12 bits of the mask
-// are accessed.
-// It returns how many bytes were consumed (up to 12).
-size_t convert_masked_utf8_to_utf32(const char *input,
-                           uint64_t utf8_end_of_code_point_mask,
-                           char32_t *&utf32_out) {
-  // we use an approach where we try to process up to 12 input bytes.
-  // Why 12 input bytes and not 16? Because we are concerned with the size of
-  // the lookup tables. Also 12 is nicely divisible by two and three.
-  //
-  uint32_t*& utf32_output = reinterpret_cast(utf32_out);
-  uint8x16_t in = vld1q_u8(reinterpret_cast(input));
-  const uint16_t input_utf8_end_of_code_point_mask =
-      utf8_end_of_code_point_mask & 0xFFF;
-  //
-  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
-  // beneficial to have fast paths that depend on branch prediction but have less latency.
-  // This results in more instructions but, potentially, also higher speeds.
-  //
-  // We first try a few fast paths.
-  if((utf8_end_of_code_point_mask & 0xffff) == 0xffff) {
-    // We process in chunks of 16 bytes
-    vst1q_u32(utf32_output, vmovl_u16(vget_low_u16(vmovl_u8(vget_low_u8 (in)))));
-    vst1q_u32(utf32_output + 4, vmovl_high_u16(vmovl_u8(vget_low_u8 (in))));
-    vst1q_u32(utf32_output + 8, vmovl_u16(vget_low_u16(vmovl_high_u8(in))));
-    vst1q_u32(utf32_output + 12, vmovl_high_u16(vmovl_high_u8(in)));
-    utf32_output += 16; // We wrote 16 16-bit characters.
-    return 16; // We consumed 16 bytes.
-  }
-  if((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa) {
-    // We want to take 8 2-byte UTF-8 words and turn them into 8 4-byte UTF-32 words.
-    // There is probably a more efficient sequence, but the following might do.
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-    const uint8x16_t sh = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-#else
-    //const uint8x16_t sh = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-    const uint8x16_t sh = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-#endif
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x7f)));
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x1f00)));
-    uint8x16_t composed = vorrq_u8(ascii, vreinterpretq_u8_u16(vshrq_n_u16(vreinterpretq_u16_u8(highbyte), 2)));
-    vst1q_u32(utf32_output,  vmovl_u16(vget_low_u16(vreinterpretq_u16_u8(composed))));
-    vst1q_u32(utf32_output+4,  vmovl_high_u16(vreinterpretq_u16_u8(composed)));
-    utf32_output += 8; // We wrote 32 bytes, 8 code points.
-    return 16;
-  }
-  if(input_utf8_end_of_code_point_mask == 0x924) {
-    // We want to take 4 3-byte UTF-8 words and turn them into 4 4-byte UTF-32 words.
-    // There is probably a more efficient sequence, but the following might do.
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-    const uint8x16_t sh = make_uint8x16_t(2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255);
-#else
-    const uint8x16_t sh = {2, 1, 0, 255, 5, 4, 3, 255, 8, 7, 6, 255, 11, 10, 9, 255};
+
+} // utf16_to_latin1 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
+
 #endif
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f))); // 7 or 6 bits
-    uint8x16_t middlebyte =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f00))); // 5 or 6 bits
-    uint8x16_t middlebyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlebyte), 2));
-    uint32x4_t highbyte =
-        vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x0f0000)))); // 4 bits
-    uint32x4_t highbyte_shifted = vshrq_n_u32(highbyte, 4);
-    uint32x4_t composed =
-        vorrq_u32(vorrq_u32(vreinterpretq_u32_u8(ascii), vreinterpretq_u32_u8(middlebyte_shifted)), highbyte_shifted);
-    vst1q_u32(utf32_output, composed);
-    utf32_output += 4;
-    return 12;
-  }
-  /// We do not have a fast path available, so we fallback.
+/* end file src/scalar/utf16_to_latin1/valid_utf16_to_latin1.h */
+/* begin file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */
+#ifndef SIMDUTF_VALID_UTF32_TO_LATIN1_H
+#define SIMDUTF_VALID_UTF32_TO_LATIN1_H
 
-  const uint8_t idx =
-      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
-  const uint8_t consumed =
-      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
+namespace simdutf {
+namespace scalar {
+namespace {
+namespace utf32_to_latin1 {
 
+inline size_t convert_valid(const char32_t *buf, size_t len, char *latin1_output) {
+  const uint32_t *data = reinterpret_cast(buf);
+  char* start = latin1_output;
+  uint32_t utf32_char;
+  size_t pos = 0;
 
-  if (idx < 64) {
-    // SIX (6) input code-words
-    // this is a relatively easy scenario
-    // we process SIX (6) input code-words. The max length in bytes of six code
-    // words spanning between 1 and 2 bytes each is 12 bytes.
-    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x7f)));
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u16(vmovq_n_u16(0x1f00)));
-    uint8x16_t composed = vorrq_u8(ascii, vreinterpretq_u8_u16(vshrq_n_u16(vreinterpretq_u16_u8(highbyte), 2)));
-    vst1q_u32(utf32_output,  vmovl_u16(vget_low_u16(vreinterpretq_u16_u8(composed))));
-    vst1q_u32(utf32_output+4,  vmovl_high_u16(vreinterpretq_u16_u8(composed)));
-    utf32_output += 6; // We wrote 12 bytes, 6 code points.
-  } else if (idx < 145) {
-    // FOUR (4) input code-words
-    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f))); // 7 or 6 bits
-    uint8x16_t middlebyte =
-        vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f00))); // 5 or 6 bits
-    uint8x16_t middlebyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlebyte), 2));
-    uint32x4_t highbyte =
-        vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x0f0000)))); // 4 bits
-    uint32x4_t highbyte_shifted = vshrq_n_u32(highbyte, 4);
-    uint32x4_t composed =
-        vorrq_u32(vorrq_u32(vreinterpretq_u32_u8(ascii), vreinterpretq_u32_u8(middlebyte_shifted)), highbyte_shifted);
-    vst1q_u32(utf32_output, composed);
-    utf32_output += 4;
-  } else if (idx < 209) {
-    // TWO (2) input code-words
-    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
-    uint8x16_t perm = vqtbl1q_u8(in, sh);
-    uint8x16_t ascii = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x7f)));
-    uint8x16_t middlebyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f00)));
-    uint8x16_t middlebyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlebyte), 2));
-    uint8x16_t middlehighbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x3f0000)));
-    // correct for spurious high bit
-    uint8x16_t correct =
-        vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x400000)))), 1));
-    middlehighbyte = veorq_u8(correct, middlehighbyte);
-    uint8x16_t middlehighbyte_shifted = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(middlehighbyte), 4));
-    uint8x16_t highbyte = vandq_u8(perm, vreinterpretq_u8_u32(vmovq_n_u32(0x07000000)));
-    uint8x16_t highbyte_shifted =vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(highbyte), 6));
-    uint8x16_t composed =
-        vorrq_u8(vorrq_u8(ascii, middlebyte_shifted),
-                     vorrq_u8(highbyte_shifted, middlehighbyte_shifted));
-    vst1q_u32(utf32_output, vreinterpretq_u32_u8(composed));
-    utf32_output += 3;
-  } else {
-    // here we know that there is an error but we do not handle errors
+  while (pos < len) {
+  utf32_char = (uint32_t)data[pos];
+
+  if (pos + 2 <= len) { // if it is safe to read 8 more bytes, check that they are Latin1
+      uint64_t v;
+      ::memcpy(&v, data + pos, sizeof(uint64_t));
+      if ((v & 0xFFFFFF00FFFFFF00) == 0) {
+      *latin1_output++ = char(buf[pos]);
+      *latin1_output++ = char(buf[pos+1]);
+      pos += 2;
+      continue;
+    }
   }
-  return consumed;
-}
-/* end file src/arm64/arm_convert_utf8_to_utf32.cpp */
+  *latin1_output++ = (char)(utf32_char & 0xFF);
+  pos++;
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf16_to_utf8.cpp
-/* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */
-/*
-    The vectorized algorithm works on single SSE register i.e., it
-    loads eight 16-bit words.
+  }
+  return latin1_output - start;
+}
 
-    We consider three cases:
-    1. an input register contains no surrogates and each value
-       is in range 0x0000 .. 0x07ff.
-    2. an input register contains no surrogates and values are
-       is in range 0x0000 .. 0xffff.
-    3. an input register contains surrogates --- i.e. codepoints
-       can have 16 or 32 bits.
 
-    Ad 1.
+} // utf32_to_latin1 namespace
+} // unnamed namespace
+} // namespace scalar
+} // namespace simdutf
 
-    When values are less than 0x0800, it means that a 16-bit words
-    can be converted into: 1) single UTF8 byte (when it's an ASCII
-    char) or 2) two UTF8 bytes.
+#endif
+/* end file src/scalar/utf32_to_latin1/valid_utf32_to_latin1.h */
 
-    For this case we do only some shuffle to obtain these 2-byte
-    codes and finally compress the whole SSE register with a single
-    shuffle.
 
-    We need 256-entry lookup table to get a compression pattern
-    and the number of output bytes in the compressed vector register.
-    Each entry occupies 17 bytes.
 
-    Ad 2.
-
-    When values fit in 16-bit words, but are above 0x07ff, then
-    a single word may produce one, two or three UTF8 bytes.
-
-    We prepare data for all these three cases in two registers.
-    The first register contains lower two UTF8 bytes (used in all
-    cases), while the second one contains just the third byte for
-    the three-UTF8-bytes case.
-
-    Finally these two registers are interleaved forming eight-element
-    array of 32-bit values. The array spans two SSE registers.
-    The bytes from the registers are compressed using two shuffles.
-
-    We need 256-entry lookup table to get a compression pattern
-    and the number of output bytes in the compressed vector register.
-    Each entry occupies 17 bytes.
+SIMDUTF_PUSH_DISABLE_WARNINGS
+SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 
 
-    To summarize:
-    - We need two 256-entry tables that have 8704 bytes in total.
-*/
-/*
-  Returns a pair: the first unprocessed byte from buf and utf8_output
-  A scalar routing should carry on the conversion of the tail.
-*/
-template 
-std::pair arm_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_out) {
-  uint8_t * utf8_output = reinterpret_cast(utf8_out);
-  const char16_t* end = buf + len;
+#if SIMDUTF_IMPLEMENTATION_ARM64
+/* begin file src/arm64/implementation.cpp */
+/* begin file src/simdutf/arm64/begin.h */
+// redefining SIMDUTF_IMPLEMENTATION to "arm64"
+// #define SIMDUTF_IMPLEMENTATION arm64
+/* end file src/simdutf/arm64/begin.h */
+namespace simdutf {
+namespace arm64 {
+namespace {
+#ifndef SIMDUTF_ARM64_H
+#error "arm64.h must be included"
+#endif
+using namespace simd;
 
-  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
-  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
-  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
+simdutf_really_inline bool is_ascii(const simd8x64& input) {
+    simd8 bits = input.reduce_or();
+    return bits.max_val() < 0b10000000u;
+}
 
-  while (buf + 16 <= end) {
-    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
-    if (!match_system(big_endian)) {
-      #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-      const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-      #else
-      const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-      #endif
-      in = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), swap));
-    }
-    if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
-        // It is common enough that we have sequences of 16 consecutive ASCII characters.
-        uint16x8_t nextin = vld1q_u16(reinterpret_cast(buf) + 8);
-        if (!match_system(big_endian)) {
-          #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-          #else
-          const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-          #endif
-          nextin = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(nextin), swap));
-        }
-        if(vmaxvq_u16(nextin) > 0x7F) {
-          // 1. pack the bytes
-          // obviously suboptimal.
-          uint8x8_t utf8_packed = vmovn_u16(in);
-          // 2. store (8 bytes)
-          vst1_u8(utf8_output, utf8_packed);
-          // 3. adjust pointers
-          buf += 8;
-          utf8_output += 8;
-          in = nextin;
-        } else {
-          // 1. pack the bytes
-          // obviously suboptimal.
-          uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin);
-          // 2. store (16 bytes)
-          vst1q_u8(utf8_output, utf8_packed);
-          // 3. adjust pointers
-          buf += 16;
-          utf8_output += 16;
-          continue; // we are done for this round!
-        }
-    }
+simdutf_unused simdutf_really_inline simd8 must_be_continuation(const simd8 prev1, const simd8 prev2, const simd8 prev3) {
+    simd8 is_second_byte = prev1 >= uint8_t(0b11000000u);
+    simd8 is_third_byte  = prev2 >= uint8_t(0b11100000u);
+    simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u);
+    // Use ^ instead of | for is_*_byte, because ^ is commutative, and the caller is using ^ as well.
+    // This will work fine because we only have to report errors for cases with 0-1 lead bytes.
+    // Multiple lead bytes implies 2 overlapping multibyte characters, and if that happens, there is
+    // guaranteed to be at least *one* lead byte that is part of only 1 other multibyte character.
+    // The error will be detected there.
+    return is_second_byte ^ is_third_byte ^ is_fourth_byte;
+}
 
-    if (vmaxvq_u16(in) <= 0x7FF) {
-          // 1. prepare 2-byte values
-          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
-          // expected output   : [110a|aaaa|10bb|bbbb] x 8
-          const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
-          const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
+simdutf_really_inline simd8 must_be_2_3_continuation(const simd8 prev2, const simd8 prev3) {
+    simd8 is_third_byte  = prev2 >= uint8_t(0b11100000u);
+    simd8 is_fourth_byte = prev3 >= uint8_t(0b11110000u);
+    return is_third_byte ^ is_fourth_byte;
+}
 
-          // t0 = [000a|aaaa|bbbb|bb00]
-          const uint16x8_t t0 = vshlq_n_u16(in, 2);
-          // t1 = [000a|aaaa|0000|0000]
-          const uint16x8_t t1 = vandq_u16(t0, v_1f00);
-          // t2 = [0000|0000|00bb|bbbb]
-          const uint16x8_t t2 = vandq_u16(in, v_003f);
-          // t3 = [000a|aaaa|00bb|bbbb]
-          const uint16x8_t t3 = vorrq_u16(t1, t2);
-          // t4 = [110a|aaaa|10bb|bbbb]
-          const uint16x8_t t4 = vorrq_u16(t3, v_c080);
-          // 2. merge ASCII and 2-byte codewords
-          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-          const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
-          const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4));
-          // 3. prepare bitmask for 8-bit lookup
+// common functions for utf8 conversions
+simdutf_really_inline uint16x4_t convert_utf8_3_byte_to_utf16(uint8x16_t in) {
+  // Low half contains  10cccccc|1110aaaa
+  // High half contains 10bbbbbb|10bbbbbb
 #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0002, 0x0008,
-                                    0x0020, 0x0080);
+  const uint8x16_t sh = make_uint8x16_t(0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10);
 #else
-          const uint16x8_t mask = { 0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0002, 0x0008,
-                                    0x0020, 0x0080 };
+  const uint8x16_t sh = {0, 2, 3, 5, 6, 8, 9, 11, 1, 1, 4, 4, 7, 7, 10, 10};
 #endif
-          uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
-          // 4. pack the bytes
-          const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
-          const uint8x16_t shuffle = vld1q_u8(row + 1);
-          const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
+  uint8x16_t perm = vqtbl1q_u8(in, sh);
+  // Split into half vectors.
+  // 10cccccc|1110aaaa
+  uint8x8_t perm_low = vget_low_u8(perm); // no-op
+  // 10bbbbbb|10bbbbbb
+  uint8x8_t perm_high = vget_high_u8(perm);
+  // xxxxxxxx 10bbbbbb
+  uint16x4_t mid = vreinterpret_u16_u8(perm_high); // no-op
+  // xxxxxxxx 1110aaaa
+  uint16x4_t high = vreinterpret_u16_u8(perm_low); // no-op
+  // Assemble with shift left insert.
+  // xxxxxxaa aabbbbbb
+  uint16x4_t mid_high = vsli_n_u16(mid, high, 6);
+  // (perm_low << 8) | (perm_low >> 8)
+  // xxxxxxxx 10cccccc
+  uint16x4_t low = vreinterpret_u16_u8(vrev16_u8(perm_low));
+  // Shift left insert into the low bits
+  // aaaabbbb bbcccccc
+  uint16x4_t composed = vsli_n_u16(low, mid_high, 6);
+  return composed;
+}
+
+simdutf_really_inline uint16x8_t convert_utf8_2_byte_to_utf16(uint8x16_t in) {
+  // Converts 6 2 byte UTF-8 characters to 6 UTF-16 characters.
+  // Technically this calculates 8, but 6 does better and happens more often
+  // (The languages which use these codepoints use ASCII spaces so 8 would need to be
+  // in the middle of a very long word).
+
+  // 10bbbbbb 110aaaaa
+  uint16x8_t upper = vreinterpretq_u16_u8(in);
+  // (in << 8) | (in >> 8)
+  // 110aaaaa 10bbbbbb
+  uint16x8_t lower = vreinterpretq_u16_u8(vrev16q_u8(in));
+  // 00000000 000aaaaa
+  uint16x8_t upper_masked = vandq_u16(upper, vmovq_n_u16(0x1F));
+  // Assemble with shift left insert.
+  // 00000aaa aabbbbbb
+  uint16x8_t composed = vsliq_n_u16(lower, upper_masked, 6);
+  return composed;
+}
+
+simdutf_really_inline uint16x8_t convert_utf8_1_to_2_byte_to_utf16(uint8x16_t in, size_t shufutf8_idx) {
+  // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters.
+  // This is a relatively easy scenario
+  // we process SIX (6) input code-code units. The max length in bytes of six code
+  // code units spanning between 1 and 2 bytes each is 12 bytes.
+  uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[shufutf8_idx]));
+  // Shuffle
+  // 1 byte: 00000000 0bbbbbbb
+  // 2 byte: 110aaaaa 10bbbbbb
+  uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh));
+  // Mask
+  // 1 byte: 00000000 0bbbbbbb
+  // 2 byte: 00000000 00bbbbbb
+  uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits
+  // 1 byte: 00000000 00000000
+  // 2 byte: 000aaaaa 00000000
+  uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits
+  // Combine with a shift right accumulate
+  // 1 byte: 00000000 0bbbbbbb
+  // 2 byte: 00000aaa aabbbbbb
+  uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2);
+  return composed;
+}
 
-          // 5. store bytes
-          vst1q_u8(utf8_output, utf8_packed);
+/* begin file src/arm64/arm_detect_encodings.cpp */
+template
+// len is known to be a multiple of 2 when this is called
+int arm_detect_encodings(const char * buf, size_t len) {
+    const char* start = buf;
+    const char* end = buf + len;
 
-          // 6. adjust pointers
-          buf += 8;
-          utf8_output += row[0];
-          continue;
+    bool is_utf8 = true;
+    bool is_utf16 = true;
+    bool is_utf32 = true;
 
-    }
-    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
-    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
-    // it is likely an uncommon occurrence.
-      if (vmaxvq_u16(surrogates_bytemask) == 0) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
-                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
-#else
-        const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
-                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
-#endif
-        /* In this branch we handle three cases:
-           1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
-           2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
-           3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
+    int out = 0;
 
-          We expand the input word (16-bit) into two words (32-bit), thus
-          we have room for four bytes. However, we need five distinct bit
-          layouts. Note that the last byte in cases #2 and #3 is the same.
+    const auto v_d8 = simd8::splat(0xd8);
+    const auto v_f8 = simd8::splat(0xf8);
 
-          We precompute byte 1 for case #1 and the common byte for cases #2 & #3
-          in register t2.
+    uint32x4_t currentmax = vmovq_n_u32(0x0);
 
-          We precompute byte 1 for case #3 and -- **conditionally** -- precompute
-          either byte 1 for case #2 or byte 2 for case #3. Note that they
-          differ by exactly one bit.
+    checker check{};
 
-          Finally from these two words we build proper UTF-8 sequence, taking
-          into account the case (i.e, the number of bytes to write).
-        */
-        /**
-         * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
-         * t2 => [0ccc|cccc] [10cc|cccc]
-         * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
-         */
-#define simdutf_vec(x) vmovq_n_u16(static_cast(x))
-        // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
-        const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
-        // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
-        const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
-        // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
-        const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
+    while(buf + 64 <= end) {
+        uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+        uint16x8_t secondin = vld1q_u16(reinterpret_cast(buf) + simd16::SIZE / sizeof(char16_t));
+        uint16x8_t thirdin = vld1q_u16(reinterpret_cast(buf) + 2*simd16::SIZE / sizeof(char16_t));
+        uint16x8_t fourthin = vld1q_u16(reinterpret_cast(buf) + 3*simd16::SIZE / sizeof(char16_t));
 
-        // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
-        const uint16x8_t s0 = vshrq_n_u16(in, 12);
-        // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
-        const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
-        // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
-        const uint16x8_t s1s = vshlq_n_u16(s1, 2);
-        // [00bb|bbbb|0000|aaaa]
-        const uint16x8_t s2 = vorrq_u16(s0, s1s);
-        // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
-        const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
-        const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
-        const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
-        const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
-        const uint16x8_t s4 = veorq_u16(s3, m0);
-#undef simdutf_vec
+        const auto u0 = simd16(in);
+        const auto u1 = simd16(secondin);
+        const auto u2 = simd16(thirdin);
+        const auto u3 = simd16(fourthin);
 
-        // 4. expand words 16-bit => 32-bit
-        const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
-        const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
+        const auto v0 = u0.shr<8>();
+        const auto v1 = u1.shr<8>();
+        const auto v2 = u2.shr<8>();
+        const auto v3 = u3.shr<8>();
 
-        // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
-        const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-        const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0100, 0x0400,
-                                    0x1000, 0x4000 );
-        const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
-                                    0x0020, 0x0080,
-                                    0x0200, 0x0800,
-                                    0x2000, 0x8000 );
-#else
-        const uint16x8_t onemask = { 0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0100, 0x0400,
-                                    0x1000, 0x4000 };
-        const uint16x8_t twomask = { 0x0002, 0x0008,
-                                    0x0020, 0x0080,
-                                    0x0200, 0x0800,
-                                    0x2000, 0x8000 };
-#endif
-        const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
-        const uint16_t mask = vaddvq_u16(combined);
-        // The following fast path may or may not be beneficial.
-        /*if(mask == 0) {
-          // We only have three-byte words. Use fast path.
-          const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
-          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
-          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
-          vst1q_u8(utf8_output, utf8_0);
-          utf8_output += 12;
-          vst1q_u8(utf8_output, utf8_1);
-          utf8_output += 12;
-          buf += 8;
-          continue;
-        }*/
-        const uint8_t mask0 = uint8_t(mask);
+        const auto in16 = simd16::pack(v0, v1);
+        const auto nextin16 = simd16::pack(v2, v3);
 
-        const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
-        const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
-        const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
+        const uint64_t surrogates_wordmask0 = ((in16 & v_f8) == v_d8).to_bitmask64();
+        const uint64_t surrogates_wordmask1 = ((nextin16 & v_f8) == v_d8).to_bitmask64();
 
-        const uint8_t mask1 = static_cast(mask >> 8);
-        const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
-        const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
-        const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
+        // Check for surrogates
+        if (surrogates_wordmask0 != 0 || surrogates_wordmask1 != 0) {
+            // Cannot be UTF8
+            is_utf8 = false;
+            // Can still be either UTF-16LE or UTF-32 depending on the positions of the surrogates
+            // To be valid UTF-32, a surrogate cannot be in the two most significant bytes of any 32-bit word.
+            // On the other hand, to be valid UTF-16LE, at least one surrogate must be in the two most significant
+            // bytes of a 32-bit word since they always come in pairs in UTF-16LE.
+            // Note that we always proceed in multiple of 4 before this point so there is no offset in 32-bit code units.
 
-        vst1q_u8(utf8_output, utf8_0);
-        utf8_output += row0[0];
-        vst1q_u8(utf8_output, utf8_1);
-        utf8_output += row1[0];
+            if (((surrogates_wordmask0 | surrogates_wordmask1) & 0xf0f0f0f0f0f0f0f0) != 0) {
+                is_utf32 = false;
+                // Code from arm_validate_utf16le.cpp
+                // Not efficient, we do not process surrogates_wordmask1
+                const char16_t * input = reinterpret_cast(buf);
+                const char16_t* end16 = reinterpret_cast(start) + len/2;
 
-        buf += 8;
-    // surrogate pair(s) in a register
-    } else {
-      // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
-        if((word & 0xFF80)==0) {
-          *utf8_output++ = char(word);
-        } else if((word & 0xF800)==0) {
-          *utf8_output++ = char((word>>6) | 0b11000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else if((word &0xF800 ) != 0xD800) {
-          *utf8_output++ = char((word>>12) | 0b11100000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else {
-          // must be a surrogate pair
-          uint16_t diff = uint16_t(word - 0xD800);
-          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
-          k++;
-          uint16_t diff2 = uint16_t(next_word - 0xDC00);
-          if((diff | diff2) > 0x3FF)  { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); }
-          uint32_t value = (diff << 10) + diff2 + 0x10000;
-          *utf8_output++ = char((value>>18) | 0b11110000);
-          *utf8_output++ = char(((value>>12) & 0b111111) | 0b10000000);
-          *utf8_output++ = char(((value>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((value & 0b111111) | 0b10000000);
-        }
-      }
-      buf += k;
-    }
-  } // while
+                const auto v_fc = simd8::splat(0xfc);
+                const auto v_dc = simd8::splat(0xdc);
 
-  return std::make_pair(buf, reinterpret_cast(utf8_output));
-}
+                const uint64_t V0 = ~surrogates_wordmask0;
 
+                const auto vH0 = ((in16 & v_fc) ==  v_dc);
+                const uint64_t H0 = vH0.to_bitmask64();
 
-/*
-  Returns a pair: a result struct and utf8_output.
-  If there is an error, the count field of the result is the position of the error.
-  Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
-  A scalar routing should carry on the conversion of the tail if needed.
-*/
-template 
-std::pair arm_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_out) {
-  uint8_t * utf8_output = reinterpret_cast(utf8_out);
-    const char16_t* start = buf;
-  const char16_t* end = buf + len;
+                const uint64_t L0 = ~H0 & surrogates_wordmask0;
 
-  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
-  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
-  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
+                const uint64_t a0 = L0 & (H0 >> 4);
 
-  while (buf + 16 <= end) {
-    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
-    if (!match_system(big_endian)) {
-      #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-      const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-      #else
-      const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-      #endif
-      in = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), swap));
-    }
-    if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
-        // It is common enough that we have sequences of 16 consecutive ASCII characters.
-        uint16x8_t nextin = vld1q_u16(reinterpret_cast(buf) + 8);
-        if (!match_system(big_endian)) {
-          #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-          #else
-          const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-          #endif
-          nextin = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(nextin), swap));
-        }
-        if(vmaxvq_u16(nextin) > 0x7F) {
-          // 1. pack the bytes
-          // obviously suboptimal.
-          uint8x8_t utf8_packed = vmovn_u16(in);
-          // 2. store (8 bytes)
-          vst1_u8(utf8_output, utf8_packed);
-          // 3. adjust pointers
-          buf += 8;
-          utf8_output += 8;
-          in = nextin;
-        } else {
-          // 1. pack the bytes
-          // obviously suboptimal.
-          uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin);
-          // 2. store (16 bytes)
-          vst1q_u8(utf8_output, utf8_packed);
-          // 3. adjust pointers
-          buf += 16;
-          utf8_output += 16;
-          continue; // we are done for this round!
-        }
-    }
+                const uint64_t b0 = a0 << 4;
 
-    if (vmaxvq_u16(in) <= 0x7FF) {
-          // 1. prepare 2-byte values
-          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
-          // expected output   : [110a|aaaa|10bb|bbbb] x 8
-          const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
-          const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
+                const uint64_t c0 = V0 | a0 | b0;
+                if (c0 == ~0ull) {
+                    input += 16;
+                } else if (c0 == 0xfffffffffffffffull) {
+                    input += 15;
+                } else {
+                    is_utf16 = false;
+                    break;
+                }
 
-          // t0 = [000a|aaaa|bbbb|bb00]
-          const uint16x8_t t0 = vshlq_n_u16(in, 2);
-          // t1 = [000a|aaaa|0000|0000]
-          const uint16x8_t t1 = vandq_u16(t0, v_1f00);
-          // t2 = [0000|0000|00bb|bbbb]
-          const uint16x8_t t2 = vandq_u16(in, v_003f);
-          // t3 = [000a|aaaa|00bb|bbbb]
-          const uint16x8_t t3 = vorrq_u16(t1, t2);
-          // t4 = [110a|aaaa|10bb|bbbb]
-          const uint16x8_t t4 = vorrq_u16(t3, v_c080);
-          // 2. merge ASCII and 2-byte codewords
-          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-          const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
-          const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4));
-          // 3. prepare bitmask for 8-bit lookup
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0002, 0x0008,
-                                    0x0020, 0x0080);
-#else
-          const uint16x8_t mask = { 0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0002, 0x0008,
-                                    0x0020, 0x0080 };
-#endif
-          uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
-          // 4. pack the bytes
-          const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
-          const uint8x16_t shuffle = vld1q_u8(row + 1);
-          const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
+                while (input + 16 < end16) {
+                    const auto in0 = simd16(input);
+                    const auto in1 = simd16(input + simd16::SIZE / sizeof(char16_t));
+                    const auto t0 = in0.shr<8>();
+                    const auto t1 = in1.shr<8>();
+                    const simd8 in_16 = simd16::pack(t0, t1);
 
-          // 5. store bytes
-          vst1q_u8(utf8_output, utf8_packed);
+                    const uint64_t surrogates_wordmask = ((in_16 & v_f8) == v_d8).to_bitmask64();
+                    if(surrogates_wordmask == 0) {
+                        input += 16;
+                    } else {
+                        const uint64_t V = ~surrogates_wordmask;
 
-          // 6. adjust pointers
-          buf += 8;
-          utf8_output += row[0];
-          continue;
+                        const auto vH = ((in_16 & v_fc) ==  v_dc);
+                        const uint64_t H = vH.to_bitmask64();
 
-    }
-    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
-    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
-    // it is likely an uncommon occurrence.
-      if (vmaxvq_u16(surrogates_bytemask) == 0) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
-                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
-#else
-        const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
-                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
-#endif
-        /* In this branch we handle three cases:
-           1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
-           2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
-           3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
+                        const uint64_t L = ~H & surrogates_wordmask;
 
-          We expand the input word (16-bit) into two words (32-bit), thus
-          we have room for four bytes. However, we need five distinct bit
-          layouts. Note that the last byte in cases #2 and #3 is the same.
+                        const uint64_t a = L & (H >> 4);
 
-          We precompute byte 1 for case #1 and the common byte for cases #2 & #3
-          in register t2.
+                        const uint64_t b = a << 4;
 
-          We precompute byte 1 for case #3 and -- **conditionally** -- precompute
-          either byte 1 for case #2 or byte 2 for case #3. Note that they
-          differ by exactly one bit.
+                        const uint64_t c = V | a | b;
+                        if (c == ~0ull) {
+                            input += 16;
+                        } else if (c == 0xfffffffffffffffull) {
+                            input += 15;
+                        } else {
+                            is_utf16 = false;
+                            break;
+                        }
+                    }
+                }
+            } else {
+                is_utf16 = false;
+                // Check for UTF-32
+                if (len % 4 == 0) {
+                    const char32_t * input = reinterpret_cast(buf);
+                    const char32_t* end32 = reinterpret_cast(start) + len/4;
 
-          Finally from these two words we build proper UTF-8 sequence, taking
-          into account the case (i.e, the number of bytes to write).
-        */
-        /**
-         * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
-         * t2 => [0ccc|cccc] [10cc|cccc]
-         * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
-         */
-#define simdutf_vec(x) vmovq_n_u16(static_cast(x))
-        // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
-        const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
-        // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
-        const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
-        // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
-        const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
+                    // Must start checking for surrogates
+                    uint32x4_t currentoffsetmax = vmovq_n_u32(0x0);
+                    const uint32x4_t offset = vmovq_n_u32(0xffff2000);
+                    const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
 
-        // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
-        const uint16x8_t s0 = vshrq_n_u16(in, 12);
-        // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
-        const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
-        // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
-        const uint16x8_t s1s = vshlq_n_u16(s1, 2);
-        // [00bb|bbbb|0000|aaaa]
-        const uint16x8_t s2 = vorrq_u16(s0, s1s);
-        // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
-        const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
-        const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
-        const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
-        const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
-        const uint16x8_t s4 = veorq_u16(s3, m0);
-#undef simdutf_vec
+                    const uint32x4_t in32 =  vreinterpretq_u32_u16(in);
+                    const uint32x4_t secondin32 =  vreinterpretq_u32_u16(secondin);
+                    const uint32x4_t thirdin32 =  vreinterpretq_u32_u16(thirdin);
+                    const uint32x4_t fourthin32 =  vreinterpretq_u32_u16(fourthin);
 
-        // 4. expand words 16-bit => 32-bit
-        const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
-        const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
+                    currentmax = vmaxq_u32(in32,currentmax);
+                    currentmax = vmaxq_u32(secondin32,currentmax);
+                    currentmax = vmaxq_u32(thirdin32,currentmax);
+                    currentmax = vmaxq_u32(fourthin32,currentmax);
 
-        // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
-        const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-        const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
-#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0100, 0x0400,
-                                    0x1000, 0x4000 );
-        const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
-                                    0x0020, 0x0080,
-                                    0x0200, 0x0800,
-                                    0x2000, 0x8000 );
-#else
-        const uint16x8_t onemask = { 0x0001, 0x0004,
-                                    0x0010, 0x0040,
-                                    0x0100, 0x0400,
-                                    0x1000, 0x4000 };
-        const uint16x8_t twomask = { 0x0002, 0x0008,
-                                    0x0020, 0x0080,
-                                    0x0200, 0x0800,
-                                    0x2000, 0x8000 };
-#endif
-        const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
-        const uint16_t mask = vaddvq_u16(combined);
-        // The following fast path may or may not be beneficial.
-        /*if(mask == 0) {
-          // We only have three-byte words. Use fast path.
-          const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
-          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
-          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
-          vst1q_u8(utf8_output, utf8_0);
-          utf8_output += 12;
-          vst1q_u8(utf8_output, utf8_1);
-          utf8_output += 12;
-          buf += 8;
-          continue;
-        }*/
-        const uint8_t mask0 = uint8_t(mask);
+                    currentoffsetmax = vmaxq_u32(vaddq_u32(in32, offset), currentoffsetmax);
+                    currentoffsetmax = vmaxq_u32(vaddq_u32(secondin32, offset), currentoffsetmax);
+                    currentoffsetmax = vmaxq_u32(vaddq_u32(thirdin32, offset), currentoffsetmax);
+                    currentoffsetmax = vmaxq_u32(vaddq_u32(fourthin32, offset), currentoffsetmax);
 
-        const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
-        const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
-        const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
+                    while (input + 4 < end32) {
+                        const uint32x4_t in_32 = vld1q_u32(reinterpret_cast(input));
+                        currentmax = vmaxq_u32(in_32,currentmax);
+                        currentoffsetmax = vmaxq_u32(vaddq_u32(in_32, offset), currentoffsetmax);
+                        input += 4;
+                    }
 
-        const uint8_t mask1 = static_cast(mask >> 8);
-        const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
-        const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
-        const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
+                    uint32x4_t forbidden_words = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
+                    if(vmaxvq_u32(forbidden_words) != 0) {
+                        is_utf32 = false;
+                    }
+                } else {
+                    is_utf32 = false;
+                }
+            }
+            break;
+        }
+        // If no surrogate, validate under other encodings as well
 
-        vst1q_u8(utf8_output, utf8_0);
-        utf8_output += row0[0];
-        vst1q_u8(utf8_output, utf8_1);
-        utf8_output += row1[0];
+        // UTF-32 validation
+        currentmax = vmaxq_u32(vreinterpretq_u32_u16(in),currentmax);
+        currentmax = vmaxq_u32(vreinterpretq_u32_u16(secondin),currentmax);
+        currentmax = vmaxq_u32(vreinterpretq_u32_u16(thirdin),currentmax);
+        currentmax = vmaxq_u32(vreinterpretq_u32_u16(fourthin),currentmax);
 
-        buf += 8;
-    // surrogate pair(s) in a register
-    } else {
-      // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
-        if((word & 0xFF80)==0) {
-          *utf8_output++ = char(word);
-        } else if((word & 0xF800)==0) {
-          *utf8_output++ = char((word>>6) | 0b11000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else if((word &0xF800 ) != 0xD800) {
-          *utf8_output++ = char((word>>12) | 0b11100000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else {
-          // must be a surrogate pair
-          uint16_t diff = uint16_t(word - 0xD800);
-          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
-          k++;
-          uint16_t diff2 = uint16_t(next_word - 0xDC00);
-          if((diff | diff2) > 0x3FF)  { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast(utf8_output)); }
-          uint32_t value = (diff << 10) + diff2 + 0x10000;
-          *utf8_output++ = char((value>>18) | 0b11110000);
-          *utf8_output++ = char(((value>>12) & 0b111111) | 0b10000000);
-          *utf8_output++ = char(((value>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((value & 0b111111) | 0b10000000);
-        }
-      }
-      buf += k;
-    }
-  } // while
+        // UTF-8 validation
+        // Relies on ../generic/utf8_validation/utf8_lookup4_algorithm.h
+        simd::simd8x64 in8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(secondin), vreinterpretq_u8_u16(thirdin), vreinterpretq_u8_u16(fourthin));
+        check.check_next_input(in8);
 
-  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output));
-}
-/* end file src/arm64/arm_convert_utf16_to_utf8.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf16_to_utf32.cpp
-/* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */
-/*
-    The vectorized algorithm works on single SSE register i.e., it
-    loads eight 16-bit words.
+        buf += 64;
+    }
 
-    We consider three cases:
-    1. an input register contains no surrogates and each value
-       is in range 0x0000 .. 0x07ff.
-    2. an input register contains no surrogates and values are
-       is in range 0x0000 .. 0xffff.
-    3. an input register contains surrogates --- i.e. codepoints
-       can have 16 or 32 bits.
+    // Check which encodings are possible
 
-    Ad 1.
+    if (is_utf8) {
+        if (static_cast(buf - start) != len) {
+            uint8_t block[64]{};
+            std::memset(block, 0x20, 64);
+            std::memcpy(block, buf, len - (buf - start));
+            simd::simd8x64 in(block);
+            check.check_next_input(in);
+        }
+        if (!check.errors()) {
+            out |= simdutf::encoding_type::UTF8;
+        }
+    }
 
-    When values are less than 0x0800, it means that a 16-bit words
-    can be converted into: 1) single UTF8 byte (when it's an ASCII
-    char) or 2) two UTF8 bytes.
+    if (is_utf16 && scalar::utf16::validate(reinterpret_cast(buf), (len - (buf - start))/2)) {
+        out |= simdutf::encoding_type::UTF16_LE;
+    }
 
-    For this case we do only some shuffle to obtain these 2-byte
-    codes and finally compress the whole SSE register with a single
-    shuffle.
+    if (is_utf32 && (len % 4 == 0)) {
+        const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
+        uint32x4_t is_zero = veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax);
+        if (vmaxvq_u32(is_zero) == 0 && scalar::utf32::validate(reinterpret_cast(buf), (len - (buf - start))/4)) {
+            out |= simdutf::encoding_type::UTF32_LE;
+        }
+    }
 
-    We need 256-entry lookup table to get a compression pattern
-    and the number of output bytes in the compressed vector register.
-    Each entry occupies 17 bytes.
+    return out;
+}
+/* end file src/arm64/arm_detect_encodings.cpp */
 
-    Ad 2.
+/* begin file src/arm64/arm_validate_utf16.cpp */
+template 
+const char16_t* arm_validate_utf16(const char16_t* input, size_t size) {
+    const char16_t* end = input + size;
+    const auto v_d8 = simd8::splat(0xd8);
+    const auto v_f8 = simd8::splat(0xf8);
+    const auto v_fc = simd8::splat(0xfc);
+    const auto v_dc = simd8::splat(0xdc);
+    while (input + 16 < end) {
+        // 0. Load data: since the validation takes into account only higher
+        //    byte of each word, we compress the two vectors into one which
+        //    consists only the higher bytes.
+        auto in0 = simd16(input);
+        auto in1 = simd16(input + simd16::SIZE / sizeof(char16_t));
+        if (!match_system(big_endian)) {
+            in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0)));
+            in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1)));
+        }
+        const auto t0 = in0.shr<8>();
+        const auto t1 = in1.shr<8>();
+        const simd8 in = simd16::pack(t0, t1);
+        // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
+        const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64();
+        if(surrogates_wordmask == 0) {
+            input += 16;
+        } else {
+            // 2. We have some surrogates that have to be distinguished:
+            //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
+            //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
+            //
+            //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
 
-    When values fit in 16-bit words, but are above 0x07ff, then
-    a single word may produce one, two or three UTF8 bytes.
+            // V - non-surrogate code units
+            //     V = not surrogates_wordmask
+            const uint64_t V = ~surrogates_wordmask;
 
-    We prepare data for all these three cases in two registers.
-    The first register contains lower two UTF8 bytes (used in all
-    cases), while the second one contains just the third byte for
-    the three-UTF8-bytes case.
+            // H - word-mask for high surrogates: the six highest bits are 0b1101'11
+            const auto vH = ((in & v_fc) ==  v_dc);
+            const uint64_t H = vH.to_bitmask64();
 
-    Finally these two registers are interleaved forming eight-element
-    array of 32-bit values. The array spans two SSE registers.
-    The bytes from the registers are compressed using two shuffles.
+            // L - word mask for low surrogates
+            //     L = not H and surrogates_wordmask
+            const uint64_t L = ~H & surrogates_wordmask;
 
-    We need 256-entry lookup table to get a compression pattern
-    and the number of output bytes in the compressed vector register.
-    Each entry occupies 17 bytes.
+            const uint64_t a = L & (H >> 4); // A low surrogate must be followed by high one.
+                              // (A low surrogate placed in the 7th register's word
+                              // is an exception we handle.)
+            const uint64_t b = a << 4; // Just mark that the opposite fact is hold,
+                          // thanks to that we have only two masks for valid case.
+            const uint64_t c = V | a | b;      // Combine all the masks into the final one.
+            if (c == ~0ull) {
+                // The whole input register contains valid UTF-16, i.e.,
+                // either single code units or proper surrogate pairs.
+                input += 16;
+            } else if (c == 0xfffffffffffffffull) {
+                // The 15 lower code units of the input register contains valid UTF-16.
+                // The 15th word may be either a low or high surrogate. It the next
+                // iteration we 1) check if the low surrogate is followed by a high
+                // one, 2) reject sole high surrogate.
+                input += 15;
+            } else {
+                return nullptr;
+            }
+        }
+    }
+    return input;
+}
 
 
-    To summarize:
-    - We need two 256-entry tables that have 8704 bytes in total.
-*/
-/*
-  Returns a pair: the first unprocessed byte from buf and utf8_output
-  A scalar routing should carry on the conversion of the tail.
-*/
 template 
-std::pair arm_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_out) {
-  uint32_t * utf32_output = reinterpret_cast(utf32_out);
-  const char16_t* end = buf + len;
-
-  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
-  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+const result arm_validate_utf16_with_errors(const char16_t* input, size_t size) {
+    const char16_t* start = input;
+    const char16_t* end = input + size;
 
-  while (buf + 16 <= end) {
-    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
-    if (!match_system(big_endian)) {
-      #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-      const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-      #else
-      const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-      #endif
-      in = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), swap));
-    }
+    const auto v_d8 = simd8::splat(0xd8);
+    const auto v_f8 = simd8::splat(0xf8);
+    const auto v_fc = simd8::splat(0xfc);
+    const auto v_dc = simd8::splat(0xdc);
+    while (input + 16 < end) {
+        // 0. Load data: since the validation takes into account only higher
+        //    byte of each word, we compress the two vectors into one which
+        //    consists only the higher bytes.
+        auto in0 = simd16(input);
+        auto in1 = simd16(input + simd16::SIZE / sizeof(char16_t));
 
-    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
-    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
-    // it is likely an uncommon occurrence.
-      if (vmaxvq_u16(surrogates_bytemask) == 0) {
-      // case: no surrogate pairs, extend all 16-bit words to 32-bit words
-      vst1q_u32(utf32_output,  vmovl_u16(vget_low_u16(in)));
-      vst1q_u32(utf32_output+4,  vmovl_high_u16(in));
-      utf32_output += 8;
-      buf += 8;
-    // surrogate pair(s) in a register
-    } else {
-      // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
-        if((word &0xF800 ) != 0xD800) {
-          *utf32_output++ = char32_t(word);
+        if (!match_system(big_endian)) {
+            in0 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in0)));
+            in1 = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in1)));
+        }
+        const auto t0 = in0.shr<8>();
+        const auto t1 = in1.shr<8>();
+        const simd8 in = simd16::pack(t0, t1);
+        // 1. Check whether we have any 0xD800..DFFF word (0b1101'1xxx'yyyy'yyyy).
+        const uint64_t surrogates_wordmask = ((in & v_f8) == v_d8).to_bitmask64();
+        if(surrogates_wordmask == 0) {
+            input += 16;
         } else {
-          // must be a surrogate pair
-          uint16_t diff = uint16_t(word - 0xD800);
-          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
-          k++;
-          uint16_t diff2 = uint16_t(next_word - 0xDC00);
-          if((diff | diff2) > 0x3FF)  { return std::make_pair(nullptr, reinterpret_cast(utf32_output)); }
-          uint32_t value = (diff << 10) + diff2 + 0x10000;
-          *utf32_output++ = char32_t(value);
+            // 2. We have some surrogates that have to be distinguished:
+            //    - low  surrogates: 0b1101'10xx'yyyy'yyyy (0xD800..0xDBFF)
+            //    - high surrogates: 0b1101'11xx'yyyy'yyyy (0xDC00..0xDFFF)
+            //
+            //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
+
+            // V - non-surrogate code units
+            //     V = not surrogates_wordmask
+            const uint64_t V = ~surrogates_wordmask;
+
+            // H - word-mask for high surrogates: the six highest bits are 0b1101'11
+            const auto vH = ((in & v_fc) ==  v_dc);
+            const uint64_t H = vH.to_bitmask64();
+
+            // L - word mask for low surrogates
+            //     L = not H and surrogates_wordmask
+            const uint64_t L = ~H & surrogates_wordmask;
+
+            const uint64_t a = L & (H >> 4); // A low surrogate must be followed by high one.
+                              // (A low surrogate placed in the 7th register's word
+                              // is an exception we handle.)
+            const uint64_t b = a << 4; // Just mark that the opposite fact is hold,
+                          // thanks to that we have only two masks for valid case.
+            const uint64_t c = V | a | b;      // Combine all the masks into the final one.
+            if (c == ~0ull) {
+                // The whole input register contains valid UTF-16, i.e.,
+                // either single code units or proper surrogate pairs.
+                input += 16;
+            } else if (c == 0xfffffffffffffffull) {
+                // The 15 lower code units of the input register contains valid UTF-16.
+                // The 15th word may be either a low or high surrogate. It the next
+                // iteration we 1) check if the low surrogate is followed by a high
+                // one, 2) reject sole high surrogate.
+                input += 15;
+            } else {
+                return result(error_code::SURROGATE, input - start);
+            }
         }
-      }
-      buf += k;
     }
-  } // while
-  return std::make_pair(buf, reinterpret_cast(utf32_output));
+    return result(error_code::SUCCESS, input - start);
 }
+/* end file src/arm64/arm_validate_utf16.cpp */
+/* begin file src/arm64/arm_validate_utf32le.cpp */
 
+const char32_t* arm_validate_utf32le(const char32_t* input, size_t size) {
+    const char32_t* end = input + size;
 
-/*
-  Returns a pair: a result struct and utf8_output.
-  If there is an error, the count field of the result is the position of the error.
-  Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
-  A scalar routing should carry on the conversion of the tail if needed.
-*/
-template 
-std::pair arm_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_out) {
-  uint32_t * utf32_output = reinterpret_cast(utf32_out);
-  const char16_t* start = buf;
-  const char16_t* end = buf + len;
+    const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
+    const uint32x4_t offset = vmovq_n_u32(0xffff2000);
+    const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
+    uint32x4_t currentmax = vmovq_n_u32(0x0);
+    uint32x4_t currentoffsetmax = vmovq_n_u32(0x0);
 
-  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
-  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+    while (input + 4 < end) {
+        const uint32x4_t in = vld1q_u32(reinterpret_cast(input));
+        currentmax = vmaxq_u32(in,currentmax);
+        currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax);
+        input += 4;
+    }
 
-  while (buf + 16 <= end) {
-    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
-    if (!match_system(big_endian)) {
-      #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-      const uint8x16_t swap = make_uint8x16_t(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-      #else
-      const uint8x16_t swap = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14};
-      #endif
-      in = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), swap));
+    uint32x4_t is_zero = veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax);
+    if(vmaxvq_u32(is_zero) != 0) {
+        return nullptr;
     }
 
-    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
-    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
-    // it is likely an uncommon occurrence.
-      if (vmaxvq_u16(surrogates_bytemask) == 0) {
-      // case: no surrogate pairs, extend all 16-bit words to 32-bit words
-      vst1q_u32(utf32_output,  vmovl_u16(vget_low_u16(in)));
-      vst1q_u32(utf32_output+4,  vmovl_high_u16(in));
-      utf32_output += 8;
-      buf += 8;
-    // surrogate pair(s) in a register
-    } else {
-      // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
-        if((word &0xF800 ) != 0xD800) {
-          *utf32_output++ = char32_t(word);
-        } else {
-          // must be a surrogate pair
-          uint16_t diff = uint16_t(word - 0xD800);
-          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
-          k++;
-          uint16_t diff2 = uint16_t(next_word - 0xDC00);
-          if((diff | diff2) > 0x3FF)  { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast(utf32_output)); }
-          uint32_t value = (diff << 10) + diff2 + 0x10000;
-          *utf32_output++ = char32_t(value);
-        }
-      }
-      buf += k;
+    is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
+    if(vmaxvq_u32(is_zero) != 0) {
+        return nullptr;
     }
-  } // while
-  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf32_output));
+
+    return input;
 }
-/* end file src/arm64/arm_convert_utf16_to_utf32.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf32_to_utf8.cpp
-/* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */
-std::pair arm_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_out) {
-  uint8_t * utf8_output = reinterpret_cast(utf8_out);
-  const char32_t* end = buf + len;
 
-  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
+const result arm_validate_utf32le_with_errors(const char32_t* input, size_t size) {
+    const char32_t* start = input;
+    const char32_t* end = input + size;
 
-  uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0);
+    const uint32x4_t standardmax = vmovq_n_u32(0x10ffff);
+    const uint32x4_t offset = vmovq_n_u32(0xffff2000);
+    const uint32x4_t standardoffsetmax = vmovq_n_u32(0xfffff7ff);
+    uint32x4_t currentmax = vmovq_n_u32(0x0);
+    uint32x4_t currentoffsetmax = vmovq_n_u32(0x0);
 
-  while (buf + 16 <= end) {
-    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
-    uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf+4));
+    while (input + 4 < end) {
+        const uint32x4_t in = vld1q_u32(reinterpret_cast(input));
+        currentmax = vmaxq_u32(in,currentmax);
+        currentoffsetmax = vmaxq_u32(vaddq_u32(in, offset), currentoffsetmax);
 
-    // Check if no bits set above 16th
-    if(vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) {
-      // Pack UTF-32 to UTF-16 safely (without surrogate pairs)
-      // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp)
-      uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin));
-      if(vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!!
-          // 1. pack the bytes
-          // obviously suboptimal.
-          uint8x8_t utf8_packed = vmovn_u16(utf16_packed);
-          // 2. store (8 bytes)
-          vst1_u8(utf8_output, utf8_packed);
-          // 3. adjust pointers
-          buf += 8;
-          utf8_output += 8;
-          continue; // we are done for this round!
-      }
+        uint32x4_t is_zero = veorq_u32(vmaxq_u32(currentmax, standardmax), standardmax);
+        if(vmaxvq_u32(is_zero) != 0) {
+            return result(error_code::TOO_LARGE, input - start);
+        }
 
-      if (vmaxvq_u16(utf16_packed) <= 0x7FF) {
-            // 1. prepare 2-byte values
-            // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
-            // expected output   : [110a|aaaa|10bb|bbbb] x 8
-            const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
-            const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
-
-            // t0 = [000a|aaaa|bbbb|bb00]
-            const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
-            // t1 = [000a|aaaa|0000|0000]
-            const uint16x8_t t1 = vandq_u16(t0, v_1f00);
-            // t2 = [0000|0000|00bb|bbbb]
-            const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
-            // t3 = [000a|aaaa|00bb|bbbb]
-            const uint16x8_t t3 = vorrq_u16(t1, t2);
-            // t4 = [110a|aaaa|10bb|bbbb]
-            const uint16x8_t t4 = vorrq_u16(t3, v_c080);
-            // 2. merge ASCII and 2-byte codewords
-            const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-            const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
-            const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, utf16_packed, t4));
-            // 3. prepare bitmask for 8-bit lookup
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-            const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0002, 0x0008,
-                                      0x0020, 0x0080);
-  #else
-            const uint16x8_t mask = { 0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0002, 0x0008,
-                                      0x0020, 0x0080 };
-  #endif
-            uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
-            // 4. pack the bytes
-            const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
-            const uint8x16_t shuffle = vld1q_u8(row + 1);
-            const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
+        is_zero = veorq_u32(vmaxq_u32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
+        if(vmaxvq_u32(is_zero) != 0) {
+            return result(error_code::SURROGATE, input - start);
+        }
 
-            // 5. store bytes
-            vst1q_u8(utf8_output, utf8_packed);
+        input += 4;
+    }
 
-            // 6. adjust pointers
-            buf += 8;
-            utf8_output += row[0];
-            continue;
+    return result(error_code::SUCCESS, input - start);
+}
+/* end file src/arm64/arm_validate_utf32le.cpp */
 
-      } else {
-        // case: words from register produce either 1, 2 or 3 UTF-8 bytes
-        const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
-        const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
-        forbidden_bytemask = vorrq_u16(vandq_u16(vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800)), forbidden_bytemask);
+/* begin file src/arm64/arm_convert_latin1_to_utf8.cpp */
+/*
+  Returns a pair: the first unprocessed byte from buf and utf8_output
+  A scalar routing should carry on the conversion of the tail.
+*/
+std::pair
+arm_convert_latin1_to_utf8(const char *latin1_input, size_t len,
+                           char *utf8_out) {
+  uint8_t *utf8_output = reinterpret_cast(utf8_out);
+  const char *end = latin1_input + len;
+  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
+  while (latin1_input + 16 <= end) {
+    uint8x16_t in8 = vld1q_u8(reinterpret_cast(latin1_input));
+    if (vmaxvq_u8(in8) <= 0x7F) { // ASCII fast path!!!!
+      vst1q_u8(utf8_output, in8);
+      utf8_output += 16;
+      latin1_input += 16;
+      continue;
+    }
 
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
-                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
-  #else
-          const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
-                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
-  #endif
-          /* In this branch we handle three cases:
-            1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
-            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
-            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
+    // We just fallback on UTF-16 code. This could be optimized/simplified
+    // further.
+    uint16x8_t in16 = vmovl_u8(vget_low_u8(in8));
+    // 1. prepare 2-byte values
+    // input 8-bit word : [aabb|bbbb] x 8
+    // expected output   : [1100|00aa|10bb|bbbb] x 8
+    const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
+    const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
+
+    // t0 = [0000|00aa|bbbb|bb00]
+    const uint16x8_t t0 = vshlq_n_u16(in16, 2);
+    // t1 = [0000|00aa|0000|0000]
+    const uint16x8_t t1 = vandq_u16(t0, v_1f00);
+    // t2 = [0000|0000|00bb|bbbb]
+    const uint16x8_t t2 = vandq_u16(in16, v_003f);
+    // t3 = [0000|00aa|00bb|bbbb]
+    const uint16x8_t t3 = vorrq_u16(t1, t2);
+    // t4 = [1100|00aa|10bb|bbbb]
+    const uint16x8_t t4 = vorrq_u16(t3, v_c080);
+    // 2. merge ASCII and 2-byte codewords
+    const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+    const uint16x8_t one_byte_bytemask = vcleq_u16(in16, v_007f);
+    const uint8x16_t utf8_unpacked =
+        vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in16, t4));
+    // 3. prepare bitmask for 8-bit lookup
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+    const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004, 0x0010, 0x0040,
+                                            0x0002, 0x0008, 0x0020, 0x0080);
+#else
+    const uint16x8_t mask = {0x0001, 0x0004, 0x0010, 0x0040,
+                             0x0002, 0x0008, 0x0020, 0x0080};
+#endif
+    uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
+    // 4. pack the bytes
+    const uint8_t *row =
+        &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
+    const uint8x16_t shuffle = vld1q_u8(row + 1);
+    const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
+
+    // 5. store bytes
+    vst1q_u8(utf8_output, utf8_packed);
+    // 6. adjust pointers
+    latin1_input += 8;
+    utf8_output += row[0];
 
-            We expand the input word (16-bit) into two words (32-bit), thus
-            we have room for four bytes. However, we need five distinct bit
-            layouts. Note that the last byte in cases #2 and #3 is the same.
+  } // while
 
-            We precompute byte 1 for case #1 and the common byte for cases #2 & #3
-            in register t2.
+  return std::make_pair(latin1_input, reinterpret_cast(utf8_output));
+}
+/* end file src/arm64/arm_convert_latin1_to_utf8.cpp */
+/* begin file src/arm64/arm_convert_latin1_to_utf16.cpp */
+template 
+std::pair arm_convert_latin1_to_utf16(const char* buf, size_t len, char16_t* utf16_output) {
+    const char* end = buf + len;
 
-            We precompute byte 1 for case #3 and -- **conditionally** -- precompute
-            either byte 1 for case #2 or byte 2 for case #3. Note that they
-            differ by exactly one bit.
+    while (buf + 16 <= end) {
+        uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf));
+        uint16x8_t inlow = vmovl_u8(vget_low_u8(in8));
+        if (!match_system(big_endian)) { inlow = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inlow))); }
+        vst1q_u16(reinterpret_cast(utf16_output), inlow);
+        uint16x8_t inhigh = vmovl_u8(vget_high_u8(in8));
+        if (!match_system(big_endian)) { inhigh = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(inhigh))); }
+        vst1q_u16(reinterpret_cast(utf16_output+8), inhigh);
+        utf16_output += 16;
+        buf += 16;
+    }
 
-            Finally from these two words we build proper UTF-8 sequence, taking
-            into account the case (i.e, the number of bytes to write).
-          */
-          /**
-           * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
-           * t2 => [0ccc|cccc] [10cc|cccc]
-           * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
-           */
-  #define simdutf_vec(x) vmovq_n_u16(static_cast(x))
-          // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
-          const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
-          // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
-          const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
-          // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
-          const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
+    return std::make_pair(buf, utf16_output);
+}
+/* end file src/arm64/arm_convert_latin1_to_utf16.cpp */
+/* begin file src/arm64/arm_convert_latin1_to_utf32.cpp */
+std::pair arm_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
+    const char* end = buf + len;
 
-          // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
-          const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
-          // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
-          const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
-          // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
-          const uint16x8_t s1s = vshlq_n_u16(s1, 2);
-          // [00bb|bbbb|0000|aaaa]
-          const uint16x8_t s2 = vorrq_u16(s0, s1s);
-          // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
-          const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
-          const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
-          const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
-          const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
-          const uint16x8_t s4 = veorq_u16(s3, m0);
-  #undef simdutf_vec
+    while (buf + 16 <= end) {
+        uint8x16_t in8 = vld1q_u8(reinterpret_cast(buf));
+        uint16x8_t in8low = vmovl_u8(vget_low_u8(in8));
+        uint32x4_t in16lowlow = vmovl_u16(vget_low_u16(in8low));
+        uint32x4_t in16lowhigh = vmovl_u16(vget_high_u16(in8low));
+        uint16x8_t in8high = vmovl_u8(vget_high_u8(in8));
+        uint32x4_t in8highlow = vmovl_u16(vget_low_u16(in8high));
+        uint32x4_t in8highhigh = vmovl_u16(vget_high_u16(in8high));
+        vst1q_u32(reinterpret_cast(utf32_output), in16lowlow);
+        vst1q_u32(reinterpret_cast(utf32_output+4), in16lowhigh);
+        vst1q_u32(reinterpret_cast(utf32_output+8), in8highlow);
+        vst1q_u32(reinterpret_cast(utf32_output+12), in8highhigh);
 
-          // 4. expand words 16-bit => 32-bit
-          const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
-          const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
+        utf32_output += 16;
+        buf += 16;
+    }
 
-          // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
-          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-          const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0100, 0x0400,
-                                      0x1000, 0x4000 );
-          const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
-                                      0x0020, 0x0080,
-                                      0x0200, 0x0800,
-                                      0x2000, 0x8000 );
-  #else
-          const uint16x8_t onemask = { 0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0100, 0x0400,
-                                      0x1000, 0x4000 };
-          const uint16x8_t twomask = { 0x0002, 0x0008,
-                                      0x0020, 0x0080,
-                                      0x0200, 0x0800,
-                                      0x2000, 0x8000 };
-  #endif
-          const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
-          const uint16_t mask = vaddvq_u16(combined);
-          // The following fast path may or may not be beneficial.
-          /*if(mask == 0) {
-            // We only have three-byte words. Use fast path.
-            const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
-            const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
-            const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
-            vst1q_u8(utf8_output, utf8_0);
-            utf8_output += 12;
-            vst1q_u8(utf8_output, utf8_1);
-            utf8_output += 12;
-            buf += 8;
-            continue;
-          }*/
-          const uint8_t mask0 = uint8_t(mask);
-          const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
-          const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
-          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
+    return std::make_pair(buf, utf32_output);
+}
+/* end file src/arm64/arm_convert_latin1_to_utf32.cpp */
 
-          const uint8_t mask1 = static_cast(mask >> 8);
-          const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
-          const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
-          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
+/* begin file src/arm64/arm_convert_utf8_to_utf16.cpp */
+// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the
+// end of the code points. Only the least significant 12 bits of the mask
+// are accessed.
+// It returns how many bytes were consumed (up to 16, usually 12).
+template 
+size_t convert_masked_utf8_to_utf16(const char *input,
+                           uint64_t utf8_end_of_code_point_mask,
+                           char16_t *&utf16_output) {
+  // we use an approach where we try to process up to 12 input bytes.
+  // Why 12 input bytes and not 16? Because we are concerned with the size of
+  // the lookup tables. Also 12 is nicely divisible by two and three.
+  //
+  uint8x16_t in = vld1q_u8(reinterpret_cast(input));
+  const uint16_t input_utf8_end_of_code_point_mask =
+      utf8_end_of_code_point_mask & 0xfff;
+  //
+  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
+  // beneficial to have fast paths that depend on branch prediction but have less latency.
+  // This results in more instructions but, potentially, also higher speeds.
 
-          vst1q_u8(utf8_output, utf8_0);
-          utf8_output += row0[0];
-          vst1q_u8(utf8_output, utf8_1);
-          utf8_output += row1[0];
+  // We first try a few fast paths.
+  // The obvious first test is ASCII, which actually consumes the full 16.
+  if((utf8_end_of_code_point_mask & 0xFFFF) == 0xffff) {
+    // We process in chunks of 16 bytes
+    // The routine in simd.h is reused.
+    simd8 temp{vreinterpretq_s8_u8(in)};
+    temp.store_ascii_as_utf16(utf16_output);
+    utf16_output += 16; // We wrote 16 16-bit characters.
+    return 16; // We consumed 16 bytes.
+  }
 
-          buf += 8;
-      }
-    // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> will produce four UTF-8 bytes.
-    } else {
-      // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFFFF80)==0) {
-          *utf8_output++ = char(word);
-        } else if((word & 0xFFFFF800)==0) {
-          *utf8_output++ = char((word>>6) | 0b11000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else if((word & 0xFFFF0000)==0) {
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); }
-          *utf8_output++ = char((word>>12) | 0b11100000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else {
-          if (word > 0x10FFFF) { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); }
-          *utf8_output++ = char((word>>18) | 0b11110000);
-          *utf8_output++ = char(((word>>12) & 0b111111) | 0b10000000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        }
-      }
-      buf += k;
+  // 3 byte sequences are the next most common, as seen in CJK, which has long sequences
+  // of these.
+  if (input_utf8_end_of_code_point_mask == 0x924) {
+    // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte UTF-16 code units.
+    uint16x4_t composed = convert_utf8_3_byte_to_utf16(in);
+    // Byte swap if necessary
+    if (!match_system(big_endian)) {
+      composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed)));
     }
-  } // while
-
-  // check for invalid input
-  if (vmaxvq_u16(forbidden_bytemask) != 0) {
-    return std::make_pair(nullptr, reinterpret_cast(utf8_output));
+    vst1_u16(reinterpret_cast(utf16_output), composed);
+    utf16_output += 4; // We wrote 4 16-bit characters.
+    return 12; // We consumed 12 bytes.
   }
-  return std::make_pair(buf, reinterpret_cast(utf8_output));
-}
 
+  // 2 byte sequences occur in short bursts in languages like Greek and Russian.
+  if ((utf8_end_of_code_point_mask & 0xFFF) == 0xaaa) {
+    // We want to take 6 2-byte UTF-8 code units and turn them into 6 2-byte UTF-16 code units.
+    uint16x8_t composed = convert_utf8_2_byte_to_utf16(in);
+    // Byte swap if necessary
+    if (!match_system(big_endian)) {
+      composed = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed)));
+    }
+    vst1q_u16(reinterpret_cast(utf16_output), composed);
 
-std::pair arm_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_out) {
-  uint8_t * utf8_output = reinterpret_cast(utf8_out);
-  const char32_t* start = buf;
-  const char32_t* end = buf + len;
+    utf16_output += 6; // We wrote 6 16-bit characters.
+    return 12; // We consumed 12 bytes.
+  }
 
-  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
+  /// We do not have a fast path available, or the fast path is unimportant, so we fallback.
+  const uint8_t idx =
+      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
 
-  while (buf + 16 <= end) {
-    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
-    uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf+4));
+  const uint8_t consumed =
+      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
 
-    // Check if no bits set above 16th
-    if(vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) {
-      // Pack UTF-32 to UTF-16 safely (without surrogate pairs)
-      // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp)
-      uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin));
-      if(vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!!
-          // 1. pack the bytes
-          // obviously suboptimal.
-          uint8x8_t utf8_packed = vmovn_u16(utf16_packed);
-          // 2. store (8 bytes)
-          vst1_u8(utf8_output, utf8_packed);
-          // 3. adjust pointers
-          buf += 8;
-          utf8_output += 8;
-          continue; // we are done for this round!
+  if (idx < 64) {
+    // SIX (6) input code-code units
+    // Convert to UTF-16
+    uint16x8_t composed = convert_utf8_1_to_2_byte_to_utf16(in, idx);
+    // Byte swap if necessary
+    if (!match_system(big_endian)) {
+      composed = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed)));
+    }
+    // Store
+    vst1q_u16(reinterpret_cast(utf16_output), composed);
+    utf16_output += 6; // We wrote 6 16-bit characters.
+    return consumed;
+  } else if (idx < 145) {
+    // FOUR (4) input code-code units
+    // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing.
+    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
+    // XXX: depending on the system scalar instructions might be faster.
+    // 1 byte: 00000000 00000000 0ccccccc
+    // 2 byte: 00000000 110bbbbb 10cccccc
+    // 3 byte: 1110aaaa 10bbbbbb 10cccccc
+    uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh));
+    // 1 byte: 00000000 0ccccccc
+    // 2 byte: xx0bbbbb x0cccccc
+    // 3 byte: xxbbbbbb x0cccccc
+    uint16x4_t lowperm = vmovn_u32(perm);
+    // Partially mask with bic (doesn't require a temporary register unlike and)
+    // The shift left insert below will clear the top bits.
+    // 1 byte: 00000000 00000000
+    // 2 byte: xx0bbbbb 00000000
+    // 3 byte: xxbbbbbb 00000000
+    uint16x4_t middlebyte = vbic_u16(lowperm, vmov_n_u16(uint16_t(~0xFF00)));
+    // ASCII
+    // 1 byte: 00000000 0ccccccc
+    // 2+byte: 00000000 00cccccc
+    uint16x4_t ascii = vand_u16(lowperm, vmov_n_u16(0x7F));
+    // Split into narrow vectors.
+    // 2 byte: 00000000 00000000
+    // 3 byte: 00000000 xxxxaaaa
+    uint16x4_t highperm = vshrn_n_u32(perm, 16);
+    // Shift right accumulate the middle byte
+    // 1 byte: 00000000 0ccccccc
+    // 2 byte: 00xx0bbb bbcccccc
+    // 3 byte: 00xxbbbb bbcccccc
+    uint16x4_t middlelow = vsra_n_u16(ascii, middlebyte, 2);
+    // Shift left and insert the top 4 bits, overwriting the garbage
+    // 1 byte: 00000000 0ccccccc
+    // 2 byte: 00000bbb bbcccccc
+    // 3 byte: aaaabbbb bbcccccc
+    uint16x4_t composed = vsli_n_u16(middlelow, highperm, 12);
+    // Byte swap if necessary
+    if (!match_system(big_endian)) {
+      composed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(composed)));
+    }
+    vst1_u16(reinterpret_cast(utf16_output), composed);
+
+    utf16_output += 4; // We wrote 4 16-bit codepoints
+    return consumed;
+  } else if (idx < 209) {
+    // THREE (3) input code-code units
+    if (input_utf8_end_of_code_point_mask == 0x888) {
+      // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte UTF-16 pairs.
+      // Generating surrogate pairs is a little tricky though, but it is easier when we
+      // can assume they are all pairs.
+      // This version does not use the LUT, but 4 byte sequences are less common and the
+      // overhead of the extra memory access is less important than the early branch overhead
+      // in shorter sequences.
+
+      // Swap byte pairs
+      // 10dddddd 10cccccc|10bbbbbb 11110aaa
+      // 10cccccc 10dddddd|11110aaa 10bbbbbb
+      uint8x16_t swap = vrev16q_u8(in);
+      // Shift left 2 bits
+      // cccccc00 dddddd00 xxxxxxxx bbbbbb00
+      uint32x4_t shift = vreinterpretq_u32_u8(vshlq_n_u8(swap, 2));
+      // Create a magic number containing the low 2 bits of the trail surrogate and all the
+      // corrections needed to create the pair.
+      // UTF-8 4b prefix   = -0x0000|0xF000
+      // surrogate offset  = -0x0000|0x0040 (0x10000 << 6)
+      // surrogate high    = +0x0000|0xD800
+      // surrogate low     = +0xDC00|0x0000
+      // -------------------------------
+      //                   = +0xDC00|0xE7C0
+      uint32x4_t magic = vmovq_n_u32(0xDC00E7C0);
+      // Generate unadjusted trail surrogate minus lowest 2 bits
+      // xxxxxxxx xxxxxxxx|11110aaa bbbbbb00
+      uint32x4_t trail = vbslq_u32(vmovq_n_u32(0x0000FF00), vreinterpretq_u32_u8(swap), shift);
+      // Insert low 2 bits of trail surrogate to magic number for later
+      // 11011100 00000000 11100111 110000cc
+      uint16x8_t magic_with_low_2 = vreinterpretq_u16_u32(vsraq_n_u32(magic, shift, 30));
+      // Generate lead surrogate
+      // xxxxcccc ccdddddd|xxxxxxxx xxxxxxxx
+      uint32x4_t lead = vreinterpretq_u32_u16(vsliq_n_u16(vreinterpretq_u16_u8(swap), vreinterpretq_u16_u8(in), 6));
+      // Mask out lead
+      // 000000cc ccdddddd|xxxxxxxx xxxxxxxx
+      lead = vbicq_u32(lead, vmovq_n_u32(uint32_t(~0x03FFFFFF)));
+      // Blend pairs
+      // 000000cc ccdddddd|11110aaa bbbbbb00
+      uint16x8_t blend = vreinterpretq_u16_u32(vbslq_u32(vmovq_n_u32(0x0000FFFF), trail, lead));
+      // Add magic number to finish the result
+      // 110111CC CCDDDDDD|110110AA BBBBBBCC
+      uint16x8_t composed = vaddq_u16(blend, magic_with_low_2);
+      // Byte swap if necessary
+      if (!match_system(big_endian)) {
+        composed = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(composed)));
       }
+      vst1q_u16(reinterpret_cast(utf16_output), composed);
+      utf16_output += 6; // We 3 32-bit surrogate pairs.
+      return 12; // We consumed 12 bytes.
+    }
+    // 3 1-4 byte sequences
+    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
 
-      if (vmaxvq_u16(utf16_packed) <= 0x7FF) {
-            // 1. prepare 2-byte values
-            // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
-            // expected output   : [110a|aaaa|10bb|bbbb] x 8
-            const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
-            const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
-
-            // t0 = [000a|aaaa|bbbb|bb00]
-            const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
-            // t1 = [000a|aaaa|0000|0000]
-            const uint16x8_t t1 = vandq_u16(t0, v_1f00);
-            // t2 = [0000|0000|00bb|bbbb]
-            const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
-            // t3 = [000a|aaaa|00bb|bbbb]
-            const uint16x8_t t3 = vorrq_u16(t1, t2);
-            // t4 = [110a|aaaa|10bb|bbbb]
-            const uint16x8_t t4 = vorrq_u16(t3, v_c080);
-            // 2. merge ASCII and 2-byte codewords
-            const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-            const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
-            const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, utf16_packed, t4));
-            // 3. prepare bitmask for 8-bit lookup
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-            const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0002, 0x0008,
-                                      0x0020, 0x0080);
-  #else
-            const uint16x8_t mask = { 0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0002, 0x0008,
-                                      0x0020, 0x0080 };
-  #endif
-            uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
-            // 4. pack the bytes
-            const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
-            const uint8x16_t shuffle = vld1q_u8(row + 1);
-            const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
+    // 1 byte: 00000000 00000000 00000000 0ddddddd
+    // 3 byte: 00000000 00000000 110ccccc 10dddddd
+    // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd
+    // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd
+    uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh));
+    // Mask the low and middle bytes
+    // 00000000 00000000 00000000 0ddddddd
+    uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7f));
+    // Because the surrogates need more work, the high surrogate is computed first.
+    uint32x4_t middlehigh = vshlq_n_u32(perm, 2);
+    // 00000000 00000000 00cccccc 00000000
+    uint32x4_t middlebyte = vandq_u32(perm, vmovq_n_u32(0x3F00));
+    // Start assembling the sequence. Since the 4th byte is in the same position as it
+    // would be in a surrogate and there is no dependency, shift left instead of right.
+    // 3 byte: 00000000 10bbbbxx xxxxxxxx xxxxxxxx
+    // 4 byte: 11110aaa bbbbbbxx xxxxxxxx xxxxxxxx
+    uint32x4_t ab = vbslq_u32(vmovq_n_u32(0xFF000000), perm, middlehigh);
+    // Top 16 bits contains the high ten bits of the surrogate pair before correction
+    // 3 byte: 00000000 10bbbbcc|cccc0000 00000000
+    // 4 byte: 11110aaa bbbbbbcc|cccc0000 00000000 - high 10 bits correct w/o correction
+    uint32x4_t abc = vbslq_u32(vmovq_n_u32(0xFFFC0000), ab, vshlq_n_u32(middlebyte, 4));
+    // Combine the low 6 or 7 bits by a shift right accumulate
+    // 3 byte: 00000000 00000010|bbbbcccc ccdddddd - low 16 bits correct
+    // 4 byte: 00000011 110aaabb|bbbbcccc ccdddddd - low 10 bits correct w/o correction
+    uint32x4_t composed = vsraq_n_u32(ascii, abc, 6);
+    // After this is for surrogates
+    // Blend the low and high surrogates
+    // 4 byte: 11110aaa bbbbbbcc|bbbbcccc ccdddddd
+    uint32x4_t mixed = vbslq_u32(vmovq_n_u32(0xFFFF0000), abc, composed);
+    // Clear the upper 6 bits of the low surrogate. Don't clear the upper bits yet as
+    // 0x10000 was not subtracted from the codepoint yet.
+    // 4 byte: 11110aaa bbbbbbcc|000000cc ccdddddd
+    uint16x8_t masked_pair =
+        vreinterpretq_u16_u32(vbicq_u32(mixed, vmovq_n_u32(uint32_t(~0xFFFF03FF))));
+    // Correct the remaining UTF-8 prefix, surrogate offset, and add the surrogate prefixes
+    // in one magic 16-bit addition.
+    // similar magic number but without the continue byte adjust and halfword swapped
+    // UTF-8 4b prefix   = -0xF000|0x0000
+    // surrogate offset  = -0x0040|0x0000 (0x10000 << 6)
+    // surrogate high    = +0xD800|0x0000
+    // surrogate low     = +0x0000|0xDC00
+    // -----------------------------------
+    //                   = +0xE7C0|0xDC00
+    uint16x8_t magic = vreinterpretq_u16_u32(vmovq_n_u32(0xE7C0DC00));
+    // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD - surrogate pair complete
+    uint32x4_t surrogates = vreinterpretq_u32_u16(vaddq_u16(masked_pair, magic));
+    // If the high bit is 1 (s32 less than zero), this needs a surrogate pair
+    uint32x4_t is_pair = vcltzq_s32(vreinterpretq_s32_u32(perm));
+
+    // Select either the 4 byte surrogate pair or the 2 byte solo codepoint
+    // 3 byte: 0xxxxxxx xxxxxxxx|bbbbcccc ccdddddd
+    // 4 byte: 110110AA BBBBBBCC|110111CC CCDDDDDD
+    uint32x4_t selected = vbslq_u32(is_pair, surrogates, composed);
+    // Byte swap if necessary
+    if (!match_system(big_endian)) {
+      selected = vreinterpretq_u32_u8(vrev16q_u8(vreinterpretq_u8_u32(selected)));
+    }
+    // Attempting to shuffle and store would be complex, just scalarize.
+    uint32_t buffer[4];
+    vst1q_u32(buffer, selected);
+    // Test for the top bit of the surrogate mask.
+    const uint32_t SURROGATE_MASK = match_system(big_endian) ? 0x80000000 : 0x00800000;
+    for (size_t i = 0; i < 3; i++) {
+      // Surrogate
+      if (buffer[i] & SURROGATE_MASK) {
+        utf16_output[0] = uint16_t(buffer[i] >> 16);
+        utf16_output[1] = uint16_t(buffer[i] & 0xFFFF);
+        utf16_output += 2;
+      } else {
+        utf16_output[0] = uint16_t(buffer[i] & 0xFFFF);
+        utf16_output++;
+      }
+    }
+    return consumed;
+  } else {
+    // here we know that there is an error but we do not handle errors
+    return 12;
+  }
+}
 
-            // 5. store bytes
-            vst1q_u8(utf8_output, utf8_packed);
+/* end file src/arm64/arm_convert_utf8_to_utf16.cpp */
+/* begin file src/arm64/arm_convert_utf8_to_utf32.cpp */
+// Convert up to 12 bytes from utf8 to utf32 using a mask indicating the
+// end of the code points. Only the least significant 12 bits of the mask
+// are accessed.
+// It returns how many bytes were consumed (up to 12).
+size_t convert_masked_utf8_to_utf32(const char *input,
+                           uint64_t utf8_end_of_code_point_mask,
+                           char32_t *&utf32_out) {
+  // we use an approach where we try to process up to 12 input bytes.
+  // Why 12 input bytes and not 16? Because we are concerned with the size of
+  // the lookup tables. Also 12 is nicely divisible by two and three.
+  //
+  uint32_t*& utf32_output = reinterpret_cast(utf32_out);
+  uint8x16_t in = vld1q_u8(reinterpret_cast(input));
+  const uint16_t input_utf8_end_of_code_point_mask =
+      utf8_end_of_code_point_mask & 0xFFF;
+  //
+  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
+  // beneficial to have fast paths that depend on branch prediction but have less latency.
+  // This results in more instructions but, potentially, also higher speeds.
+  //
+  // We first try a few fast paths.
+  if((utf8_end_of_code_point_mask & 0xffff) == 0xffff) {
+    // We process in chunks of 16 bytes.
+    // use fast implementation in src/simdutf/arm64/simd.h
+    // Ideally the compiler can keep the tables in registers.
+    simd8 temp{vreinterpretq_s8_u8(in)};
+    temp.store_ascii_as_utf32_tbl(utf32_out);
+    utf32_output += 16; // We wrote 16 32-bit characters.
+    return 16; // We consumed 16 bytes.
+  }
+  if(input_utf8_end_of_code_point_mask == 0x924) {
+    // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte UTF-32 code units.
+    // Convert to UTF-16
+    uint16x4_t composed_utf16 = convert_utf8_3_byte_to_utf16(in);
+    // Zero extend and store via ST2 with a zero.
+    uint16x4x2_t interleaver = {{ composed_utf16, vmov_n_u16(0) }};
+    vst2_u16(reinterpret_cast(utf32_output), interleaver);
+    utf32_output += 4; // We wrote 4 32-bit characters.
+    return 12; // We consumed 12 bytes.
+  }
+
+  // 2 byte sequences occur in short bursts in languages like Greek and Russian.
+  if(input_utf8_end_of_code_point_mask == 0xaaa) {
+    // We want to take 6 2-byte UTF-8 code units and turn them into 6 4-byte UTF-32 code units.
+    // Convert to UTF-16
+    uint16x8_t composed_utf16 = convert_utf8_2_byte_to_utf16(in);
+    // Zero extend and store via ST2 with a zero.
+    uint16x8x2_t interleaver = {{ composed_utf16, vmovq_n_u16(0) }};
+    vst2q_u16(reinterpret_cast(utf32_output), interleaver);
+    utf32_output += 6; // We wrote 6 32-bit characters.
+    return 12; // We consumed 12 bytes.
+  }
+  /// Either no fast path or an unimportant fast path.
 
-            // 6. adjust pointers
-            buf += 8;
-            utf8_output += row[0];
-            continue;
+  const uint8_t idx =
+      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
+  const uint8_t consumed =
+      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
 
-      } else {
-        // case: words from register produce either 1, 2 or 3 UTF-8 bytes
 
-        // check for invalid input
-        const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
-        const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
-        const uint16x8_t forbidden_bytemask = vandq_u16(vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800));
-        if (vmaxvq_u16(forbidden_bytemask) != 0) {
-          return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf8_output));
-        }
-
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
-                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
-  #else
-          const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
-                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
-  #endif
-          /* In this branch we handle three cases:
-            1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
-            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
-            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
-
-            We expand the input word (16-bit) into two words (32-bit), thus
-            we have room for four bytes. However, we need five distinct bit
-            layouts. Note that the last byte in cases #2 and #3 is the same.
-
-            We precompute byte 1 for case #1 and the common byte for cases #2 & #3
-            in register t2.
-
-            We precompute byte 1 for case #3 and -- **conditionally** -- precompute
-            either byte 1 for case #2 or byte 2 for case #3. Note that they
-            differ by exactly one bit.
-
-            Finally from these two words we build proper UTF-8 sequence, taking
-            into account the case (i.e, the number of bytes to write).
-          */
-          /**
-           * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
-           * t2 => [0ccc|cccc] [10cc|cccc]
-           * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
-           */
-  #define simdutf_vec(x) vmovq_n_u16(static_cast(x))
-          // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
-          const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
-          // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
-          const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
-          // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
-          const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
-
-          // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
-          const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
-          // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
-          const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
-          // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
-          const uint16x8_t s1s = vshlq_n_u16(s1, 2);
-          // [00bb|bbbb|0000|aaaa]
-          const uint16x8_t s2 = vorrq_u16(s0, s1s);
-          // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
-          const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
-          const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
-          const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
-          const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
-          const uint16x8_t s4 = veorq_u16(s3, m0);
-  #undef simdutf_vec
+  if (idx < 64) {
+    // SIX (6) input code-code units
+    // Convert to UTF-16
+    uint16x8_t composed_utf16 = convert_utf8_1_to_2_byte_to_utf16(in, idx);
+    // Zero extend and store with ST2 and zero
+    uint16x8x2_t interleaver = {{ composed_utf16, vmovq_n_u16(0) }};
+    vst2q_u16(reinterpret_cast(utf32_output), interleaver);
+    utf32_output += 6; // We wrote 6 32-bit characters.
+    return consumed;
+  } else if (idx < 145) {
+    // FOUR (4) input code-code units
+    // UTF-16 and UTF-32 use similar algorithms, but UTF-32 skips the narrowing.
+    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
+    // Shuffle
+    // 1 byte: 00000000 00000000 0ccccccc
+    // 2 byte: 00000000 110bbbbb 10cccccc
+    // 3 byte: 1110aaaa 10bbbbbb 10cccccc
+    uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh));
+    // Split
+    // 00000000 00000000 0ccccccc
+    uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F));    // 6 or 7 bits
+    // Note: unmasked
+    // xxxxxxxx aaaaxxxx xxxxxxxx
+    uint32x4_t high = vshrq_n_u32(perm, 4);                   // 4 bits
+    // Use 16 bit bic instead of and.
+    // The top bits will be corrected later in the bsl
+    // 00000000 10bbbbbb 00000000
+    uint32x4_t middle =
+        vreinterpretq_u32_u16(vbicq_u16(vreinterpretq_u16_u32(perm), vmovq_n_u16(uint16_t(~0xff00)))); // 5 or 6 bits
+    // Combine low and middle with shift right accumulate
+    // 00000000 00xxbbbb bbcccccc
+    uint32x4_t lowmid = vsraq_n_u32(ascii, middle, 2);
+    // Insert top 4 bits from high byte with bitwise select
+    // 00000000 aaaabbbb bbcccccc
+    uint32x4_t composed = vbslq_u32(vmovq_n_u32(0x0000F000), high, lowmid);
+    vst1q_u32(utf32_output, composed);
+    utf32_output += 4; // We wrote 4 32-bit characters.
+    return consumed;
+  } else if (idx < 209) {
+    // THREE (3) input code-code units
+    if (input_utf8_end_of_code_point_mask == 0x888) {
+      // We want to take 3 4-byte UTF-8 code units and turn them into 3 4-byte UTF-32 code units.
+      // This uses the same method as the fixed 3 byte version, reversing and shift left insert.
+      // However, there is no need for a shuffle mask now, just rev16 and rev32.
+      //
+      // This version does not use the LUT, but 4 byte sequences are less common and the
+      // overhead of the extra memory access is less important than the early branch overhead
+      // in shorter sequences, so it comes last.
+
+      // Swap pairs of bytes
+      // 10dddddd|10cccccc|10bbbbbb|11110aaa
+      // 10cccccc 10dddddd|11110aaa 10bbbbbb
+      uint16x8_t swap1 = vreinterpretq_u16_u8(vrev16q_u8(in));
+      // Shift left and insert
+      // xxxxcccc ccdddddd|xxxxxxxa aabbbbbb
+      uint16x8_t merge1 = vsliq_n_u16(swap1, vreinterpretq_u16_u8(in), 6);
+      // Swap 16-bit lanes
+      // xxxxcccc ccdddddd xxxxxxxa aabbbbbb
+      // xxxxxxxa aabbbbbb xxxxcccc ccdddddd
+      uint32x4_t swap2 = vreinterpretq_u32_u16(vrev32q_u16(merge1));
+      // Shift insert again
+      // xxxxxxxx xxxaaabb bbbbcccc ccdddddd
+      uint32x4_t merge2 = vsliq_n_u32(swap2, vreinterpretq_u32_u16(merge1), 12);
+      // Clear the garbage
+      // 00000000 000aaabb bbbbcccc ccdddddd
+      uint32x4_t composed = vandq_u32(merge2, vmovq_n_u32(0x1FFFFF));
+      // Store
+      vst1q_u32(utf32_output, composed);
+
+      utf32_output += 3; // We wrote 3 32-bit characters.
+      return 12; // We consumed 12 bytes.
+    }
+    // Unlike UTF-16, doing a fast codepath doesn't have nearly as much benefit due to
+    // surrogates no longer being involved.
+    uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
+    // 1 byte: 00000000 00000000 00000000 0ddddddd
+    // 2 byte: 00000000 00000000 110ccccc 10dddddd
+    // 3 byte: 00000000 1110bbbb 10cccccc 10dddddd
+    // 4 byte: 11110aaa 10bbbbbb 10cccccc 10dddddd
+    uint32x4_t perm = vreinterpretq_u32_u8(vqtbl1q_u8(in, sh));
+    // Ascii
+    uint32x4_t ascii = vandq_u32(perm, vmovq_n_u32(0x7F));
+    uint32x4_t middle = vandq_u32(perm, vmovq_n_u32(0x3f00));
+    // When converting the way we do, the 3 byte prefix will be interpreted as the
+    // 18th bit being set, since the code would interpret the lead byte (0b1110bbbb)
+    // as a continuation byte (0b10bbbbbb). To fix this, we can either xor or do an
+    // 8 bit add of the 6th bit shifted right by 1. Since NEON has shift right accumulate,
+    // we use that.
+    //  4 byte   3 byte
+    // 10bbbbbb 1110bbbb
+    // 00000000 01000000 6th bit
+    // 00000000 00100000 shift right
+    // 10bbbbbb 0000bbbb add
+    // 00bbbbbb 0000bbbb mask
+    uint8x16_t correction =
+        vreinterpretq_u8_u32(vandq_u32(perm, vmovq_n_u32(0x00400000)));
+    uint32x4_t corrected =
+        vreinterpretq_u32_u8(vsraq_n_u8(vreinterpretq_u8_u32(perm), correction, 1));
+    // 00000000 00000000 0000cccc ccdddddd
+    uint32x4_t cd = vsraq_n_u32(ascii, middle, 2);
+    // Insert twice
+    // xxxxxxxx xxxaaabb bbbbxxxx xxxxxxxx
+    uint32x4_t ab = vbslq_u32(vmovq_n_u32(0x01C0000), vshrq_n_u32(corrected, 6), vshrq_n_u32(corrected, 4));
+    // 00000000 000aaabb bbbbcccc ccdddddd
+    uint32x4_t composed = vbslq_u32(vmovq_n_u32(0xFFE00FFF), cd, ab);
+    // Store
+    vst1q_u32(utf32_output, composed);
+    utf32_output += 3; // We wrote 3 32-bit characters.
+    return consumed;
+  } else {
+    // here we know that there is an error but we do not handle errors
+    return 12;
+  }
+}
+/* end file src/arm64/arm_convert_utf8_to_utf32.cpp */
+/* begin file src/arm64/arm_convert_utf8_to_latin1.cpp */
+// Convert up to 16 bytes from utf8 to utf16 using a mask indicating the
+// end of the code points. Only the least significant 12 bits of the mask
+// are accessed.
+// It returns how many bytes were consumed (up to 16, usually 12).
+size_t convert_masked_utf8_to_latin1(const char *input,
+                           uint64_t utf8_end_of_code_point_mask,
+                           char *&latin1_output) {
+  // we use an approach where we try to process up to 12 input bytes.
+  // Why 12 input bytes and not 16? Because we are concerned with the size of
+  // the lookup tables. Also 12 is nicely divisible by two and three.
+  //
+  uint8x16_t in = vld1q_u8(reinterpret_cast(input));
+  const uint16_t input_utf8_end_of_code_point_mask =
+      utf8_end_of_code_point_mask & 0xfff;
+  //
+  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
+  // beneficial to have fast paths that depend on branch prediction but have less latency.
+  // This results in more instructions but, potentially, also higher speeds.
 
-          // 4. expand words 16-bit => 32-bit
-          const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
-          const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
+  // We first try a few fast paths.
+  // The obvious first test is ASCII, which actually consumes the full 16.
+  if((utf8_end_of_code_point_mask & 0xFFFF) == 0xffff) {
+    // We process in chunks of 16 bytes
+    vst1q_u8(reinterpret_cast(latin1_output), in);
+    latin1_output += 16; // We wrote 16 18-bit characters.
+    return 16; // We consumed 16 bytes.
+  }
+  /// We do not have a fast path available, or the fast path is unimportant, so we fallback.
+  const uint8_t idx =
+      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
 
-          // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
-          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
-          const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
-  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-          const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0100, 0x0400,
-                                      0x1000, 0x4000 );
-          const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
-                                      0x0020, 0x0080,
-                                      0x0200, 0x0800,
-                                      0x2000, 0x8000 );
-  #else
-          const uint16x8_t onemask = { 0x0001, 0x0004,
-                                      0x0010, 0x0040,
-                                      0x0100, 0x0400,
-                                      0x1000, 0x4000 };
-          const uint16x8_t twomask = { 0x0002, 0x0008,
-                                      0x0020, 0x0080,
-                                      0x0200, 0x0800,
-                                      0x2000, 0x8000 };
-  #endif
-          const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
-          const uint16_t mask = vaddvq_u16(combined);
-          // The following fast path may or may not be beneficial.
-          /*if(mask == 0) {
-            // We only have three-byte words. Use fast path.
-            const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
-            const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
-            const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
-            vst1q_u8(utf8_output, utf8_0);
-            utf8_output += 12;
-            vst1q_u8(utf8_output, utf8_1);
-            utf8_output += 12;
-            buf += 8;
-            continue;
-          }*/
-          const uint8_t mask0 = uint8_t(mask);
+  const uint8_t consumed =
+      simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
+  // this indicates an invalid input:
+  if(idx >= 64) { return consumed; }
+  // Here we should have (idx < 64), if not, there is a bug in the validation or elsewhere.
+  // SIX (6) input code-code units
+  // this is a relatively easy scenario
+  // we process SIX (6) input code-code units. The max length in bytes of six code
+  // code units spanning between 1 and 2 bytes each is 12 bytes.
+  // Converts 6 1-2 byte UTF-8 characters to 6 UTF-16 characters.
+  // This is a relatively easy scenario
+  // we process SIX (6) input code-code units. The max length in bytes of six code
+  // code units spanning between 1 and 2 bytes each is 12 bytes.
+  uint8x16_t sh = vld1q_u8(reinterpret_cast(simdutf::tables::utf8_to_utf16::shufutf8[idx]));
+  // Shuffle
+  // 1 byte: 00000000 0bbbbbbb
+  // 2 byte: 110aaaaa 10bbbbbb
+  uint16x8_t perm = vreinterpretq_u16_u8(vqtbl1q_u8(in, sh));
+  // Mask
+  // 1 byte: 00000000 0bbbbbbb
+  // 2 byte: 00000000 00bbbbbb
+  uint16x8_t ascii = vandq_u16(perm, vmovq_n_u16(0x7f)); // 6 or 7 bits
+  // 1 byte: 00000000 00000000
+  // 2 byte: 000aaaaa 00000000
+  uint16x8_t highbyte = vandq_u16(perm, vmovq_n_u16(0x1f00)); // 5 bits
+  // Combine with a shift right accumulate
+  // 1 byte: 00000000 0bbbbbbb
+  // 2 byte: 00000aaa aabbbbbb
+  uint16x8_t composed = vsraq_n_u16(ascii, highbyte, 2);
+  // writing 8 bytes even though we only care about the first 6 bytes.
+  uint8x8_t latin1_packed = vmovn_u16(composed);
+  vst1_u8(reinterpret_cast(latin1_output), latin1_packed);
+  latin1_output += 6; // We wrote 6 bytes.
+  return consumed;
+}
 
-          const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
-          const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
-          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
+/* end file src/arm64/arm_convert_utf8_to_latin1.cpp */
 
-          const uint8_t mask1 = static_cast(mask >> 8);
-          const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
-          const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
-          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
+/* begin file src/arm64/arm_convert_utf16_to_latin1.cpp */
 
-          vst1q_u8(utf8_output, utf8_0);
-          utf8_output += row0[0];
-          vst1q_u8(utf8_output, utf8_1);
-          utf8_output += row1[0];
+template 
+std::pair arm_convert_utf16_to_latin1(const char16_t* buf, size_t len, char* latin1_output) {
+  const char16_t* end = buf + len;
+  while (buf + 8 <= end) {
+    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
+    if (vmaxvq_u16(in) <= 0xff) {
+        // 1. pack the bytes
+        uint8x8_t latin1_packed = vmovn_u16(in);
+        // 2. store (8 bytes)
+        vst1_u8(reinterpret_cast(latin1_output), latin1_packed);
+        // 3. adjust pointers
+        buf += 8;
+        latin1_output += 8;
+    } else {
+      return std::make_pair(nullptr, reinterpret_cast(latin1_output));
+    }
+  } // while
+  return std::make_pair(buf, latin1_output);
+}
 
-          buf += 8;
-      }
-    // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> will produce four UTF-8 bytes.
+template 
+std::pair arm_convert_utf16_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) {
+  const char16_t* start = buf;
+  const char16_t* end = buf + len;
+  while (buf + 8 <= end) {
+    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
+    if (vmaxvq_u16(in) <= 0xff) {
+        // 1. pack the bytes
+        uint8x8_t latin1_packed = vmovn_u16(in);
+        // 2. store (8 bytes)
+        vst1_u8(reinterpret_cast(latin1_output), latin1_packed);
+        // 3. adjust pointers
+        buf += 8;
+        latin1_output += 8;
     } else {
       // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFFFF80)==0) {
-          *utf8_output++ = char(word);
-        } else if((word & 0xFFFFF800)==0) {
-          *utf8_output++ = char((word>>6) | 0b11000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else if((word & 0xFFFF0000)==0) {
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), reinterpret_cast(utf8_output)); }
-          *utf8_output++ = char((word>>12) | 0b11100000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+      for(int k = 0; k < 8; k++) {
+        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
+        if(word <= 0xff) {
+          *latin1_output++ = char(word);
         } else {
-          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), reinterpret_cast(utf8_output)); }
-          *utf8_output++ = char((word>>18) | 0b11110000);
-          *utf8_output++ = char(((word>>12) & 0b111111) | 0b10000000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+          return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output);
         }
       }
-      buf += k;
     }
   } // while
-
-  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output));
+  return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
 }
-/* end file src/arm64/arm_convert_utf32_to_utf8.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=arm64/arm_convert_utf32_to_utf16.cpp
-/* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */
-template 
-std::pair arm_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_out) {
-  uint16_t * utf16_output = reinterpret_cast(utf16_out);
-  const char32_t* end = buf + len;
+/* end file src/arm64/arm_convert_utf16_to_latin1.cpp */
+/* begin file src/arm64/arm_convert_utf16_to_utf8.cpp */
+/*
+    The vectorized algorithm works on single SSE register i.e., it
+    loads eight 16-bit code units.
 
-  uint16x4_t forbidden_bytemask = vmov_n_u16(0x0);
+    We consider three cases:
+    1. an input register contains no surrogates and each value
+       is in range 0x0000 .. 0x07ff.
+    2. an input register contains no surrogates and values are
+       is in range 0x0000 .. 0xffff.
+    3. an input register contains surrogates --- i.e. codepoints
+       can have 16 or 32 bits.
 
-  while(buf + 4 <= end) {
-    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
+    Ad 1.
 
-    // Check if no bits set above 16th
-    if(vmaxvq_u32(in) <= 0xFFFF) {
-      uint16x4_t utf16_packed = vmovn_u32(in);
+    When values are less than 0x0800, it means that a 16-bit code unit
+    can be converted into: 1) single UTF8 byte (when it's an ASCII
+    char) or 2) two UTF8 bytes.
 
-      const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
-      const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
-      forbidden_bytemask = vorr_u16(vand_u16(vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800)), forbidden_bytemask);
+    For this case we do only some shuffle to obtain these 2-byte
+    codes and finally compress the whole SSE register with a single
+    shuffle.
 
-      if (!match_system(big_endian)) {
-        #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint8x8_t swap = make_uint8x8_t(1, 0, 3, 2, 5, 4, 7, 6);
-        #else
-        const uint8x8_t swap = {1, 0, 3, 2, 5, 4, 7, 6};
-        #endif
-        utf16_packed = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(utf16_packed), swap));
-      }
-      vst1_u16(utf16_output, utf16_packed);
-      utf16_output += 4;
-      buf += 4;
-    } else {
-      size_t forward = 3;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFF0000)==0) {
-          // will not generate a surrogate pair
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, reinterpret_cast(utf16_output)); }
-          *utf16_output++ = !match_system(big_endian) ? char16_t(word >> 8 | word << 8) : char16_t(word);
-        } else {
-          // will generate a surrogate pair
-          if (word > 0x10FFFF) { return std::make_pair(nullptr, reinterpret_cast(utf16_output)); }
-          word -= 0x10000;
-          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
-          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
-          if (!match_system(big_endian)) {
-            high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8);
-            low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8);
-          }
-          *utf16_output++ = char16_t(high_surrogate);
-          *utf16_output++ = char16_t(low_surrogate);
-        }
-      }
-      buf += k;
-    }
-  }
+    We need 256-entry lookup table to get a compression pattern
+    and the number of output bytes in the compressed vector register.
+    Each entry occupies 17 bytes.
 
-  // check for invalid input
-  if (vmaxv_u16(forbidden_bytemask) != 0) {
-    return std::make_pair(nullptr, reinterpret_cast(utf16_output));
-  }
+    Ad 2.
 
-  return std::make_pair(buf, reinterpret_cast(utf16_output));
-}
+    When values fit in 16-bit code units, but are above 0x07ff, then
+    a single word may produce one, two or three UTF8 bytes.
 
+    We prepare data for all these three cases in two registers.
+    The first register contains lower two UTF8 bytes (used in all
+    cases), while the second one contains just the third byte for
+    the three-UTF8-bytes case.
 
-template 
-std::pair arm_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_out) {
-  uint16_t * utf16_output = reinterpret_cast(utf16_out);
-  const char32_t* start = buf;
-  const char32_t* end = buf + len;
+    Finally these two registers are interleaved forming eight-element
+    array of 32-bit values. The array spans two SSE registers.
+    The bytes from the registers are compressed using two shuffles.
 
-  while(buf + 4 <= end) {
-    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
+    We need 256-entry lookup table to get a compression pattern
+    and the number of output bytes in the compressed vector register.
+    Each entry occupies 17 bytes.
 
-    // Check if no bits set above 16th
-    if(vmaxvq_u32(in) <= 0xFFFF) {
-      uint16x4_t utf16_packed = vmovn_u32(in);
 
-      const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
-      const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
-      const uint16x4_t forbidden_bytemask = vand_u16(vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800));
-      if (vmaxv_u16(forbidden_bytemask) != 0) {
-        return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf16_output));
-      }
+    To summarize:
+    - We need two 256-entry tables that have 8704 bytes in total.
+*/
+/*
+  Returns a pair: the first unprocessed byte from buf and utf8_output
+  A scalar routing should carry on the conversion of the tail.
+*/
+template 
+std::pair arm_convert_utf16_to_utf8(const char16_t* buf, size_t len, char* utf8_out) {
+  uint8_t * utf8_output = reinterpret_cast(utf8_out);
+  const char16_t* end = buf + len;
 
-      if (!match_system(big_endian)) {
-        #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
-        const uint8x8_t swap = make_uint8x8_t(1, 0, 3, 2, 5, 4, 7, 6);
-        #else
-        const uint8x8_t swap = {1, 0, 3, 2, 5, 4, 7, 6};
-        #endif
-        utf16_packed = vreinterpret_u16_u8(vtbl1_u8(vreinterpret_u8_u16(utf16_packed), swap));
-      }
-      vst1_u16(utf16_output, utf16_packed);
-      utf16_output += 4;
-      buf += 4;
-    } else {
-      size_t forward = 3;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFF0000)==0) {
-          // will not generate a surrogate pair
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), reinterpret_cast(utf16_output)); }
-          *utf16_output++ = !match_system(big_endian) ? char16_t(word >> 8 | word << 8) : char16_t(word);
+  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
+  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
+
+  while (buf + 16 <= end) {
+    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
+    if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
+        // It is common enough that we have sequences of 16 consecutive ASCII characters.
+        uint16x8_t nextin = vld1q_u16(reinterpret_cast(buf) + 8);
+        if (!match_system(big_endian)) { nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); }
+        if(vmaxvq_u16(nextin) > 0x7F) {
+          // 1. pack the bytes
+          // obviously suboptimal.
+          uint8x8_t utf8_packed = vmovn_u16(in);
+          // 2. store (8 bytes)
+          vst1_u8(utf8_output, utf8_packed);
+          // 3. adjust pointers
+          buf += 8;
+          utf8_output += 8;
+          in = nextin;
         } else {
-          // will generate a surrogate pair
-          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), reinterpret_cast(utf16_output)); }
-          word -= 0x10000;
-          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
-          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
-          if (!match_system(big_endian)) {
-            high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8);
-            low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8);
-          }
-          *utf16_output++ = char16_t(high_surrogate);
-          *utf16_output++ = char16_t(low_surrogate);
+          // 1. pack the bytes
+          // obviously suboptimal.
+          uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin);
+          // 2. store (16 bytes)
+          vst1q_u8(utf8_output, utf8_packed);
+          // 3. adjust pointers
+          buf += 16;
+          utf8_output += 16;
+          continue; // we are done for this round!
         }
-      }
-      buf += k;
     }
-  }
 
-  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf16_output));
-}
-/* end file src/arm64/arm_convert_utf32_to_utf16.cpp */
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h
-/* begin file src/generic/buf_block_reader.h */
-namespace simdutf {
-namespace arm64 {
-namespace {
+    if (vmaxvq_u16(in) <= 0x7FF) {
+          // 1. prepare 2-byte values
+          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
+          // expected output   : [110a|aaaa|10bb|bbbb] x 8
+          const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
+          const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
 
-// Walks through a buffer in block-sized increments, loading the last part with spaces
-template
-struct buf_block_reader {
-public:
-  simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
-  simdutf_really_inline size_t block_index();
-  simdutf_really_inline bool has_full_block() const;
-  simdutf_really_inline const uint8_t *full_block() const;
-  /**
-   * Get the last block, padded with spaces.
-   *
-   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
-   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
-   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
-   *
-   * @return the number of effective characters in the last block.
-   */
-  simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
-  simdutf_really_inline void advance();
-private:
-  const uint8_t *buf;
-  const size_t len;
-  const size_t lenminusstep;
-  size_t idx;
-};
+          // t0 = [000a|aaaa|bbbb|bb00]
+          const uint16x8_t t0 = vshlq_n_u16(in, 2);
+          // t1 = [000a|aaaa|0000|0000]
+          const uint16x8_t t1 = vandq_u16(t0, v_1f00);
+          // t2 = [0000|0000|00bb|bbbb]
+          const uint16x8_t t2 = vandq_u16(in, v_003f);
+          // t3 = [000a|aaaa|00bb|bbbb]
+          const uint16x8_t t3 = vorrq_u16(t1, t2);
+          // t4 = [110a|aaaa|10bb|bbbb]
+          const uint16x8_t t4 = vorrq_u16(t3, v_c080);
+          // 2. merge ASCII and 2-byte codewords
+          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+          const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
+          const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4));
+          // 3. prepare bitmask for 8-bit lookup
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+          const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0002, 0x0008,
+                                    0x0020, 0x0080);
+#else
+          const uint16x8_t mask = { 0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0002, 0x0008,
+                                    0x0020, 0x0080 };
+#endif
+          uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
+          // 4. pack the bytes
+          const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
+          const uint8x16_t shuffle = vld1q_u8(row + 1);
+          const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
 
-// Routines to print masks and text for debugging bitmask operations
-simdutf_unused static char * format_input_text_64(const uint8_t *text) {
-  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
-  for (size_t i=0; i); i++) {
-    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
-  }
-  buf[sizeof(simd8x64)] = '\0';
-  return buf;
-}
+          // 5. store bytes
+          vst1q_u8(utf8_output, utf8_packed);
 
-// Routines to print masks and text for debugging bitmask operations
-simdutf_unused static char * format_input_text(const simd8x64& in) {
-  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
-  in.store(reinterpret_cast(buf));
-  for (size_t i=0; i); i++) {
-    if (buf[i] < ' ') { buf[i] = '_'; }
-  }
-  buf[sizeof(simd8x64)] = '\0';
-  return buf;
-}
+          // 6. adjust pointers
+          buf += 8;
+          utf8_output += row[0];
+          continue;
 
-simdutf_unused static char * format_mask(uint64_t mask) {
-  static char *buf = reinterpret_cast(malloc(64 + 1));
-  for (size_t i=0; i<64; i++) {
-    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
-  }
-  buf[64] = '\0';
-  return buf;
-}
+    }
+    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
+    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
+    // it is likely an uncommon occurrence.
+    if (vmaxvq_u16(surrogates_bytemask) == 0) {
+        // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+        const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
+                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
+#else
+        const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
+                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
+#endif
+        /* In this branch we handle three cases:
+           1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
+           2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
+           3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-template
-simdutf_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+          We expand the input word (16-bit) into two code units (32-bit), thus
+          we have room for four bytes. However, we need five distinct bit
+          layouts. Note that the last byte in cases #2 and #3 is the same.
 
-template
-simdutf_really_inline size_t buf_block_reader::block_index() { return idx; }
+          We precompute byte 1 for case #1 and the common byte for cases #2 & #3
+          in register t2.
 
-template
-simdutf_really_inline bool buf_block_reader::has_full_block() const {
-  return idx < lenminusstep;
-}
+          We precompute byte 1 for case #3 and -- **conditionally** -- precompute
+          either byte 1 for case #2 or byte 2 for case #3. Note that they
+          differ by exactly one bit.
 
-template
-simdutf_really_inline const uint8_t *buf_block_reader::full_block() const {
-  return &buf[idx];
-}
+          Finally from these two code units we build proper UTF-8 sequence, taking
+          into account the case (i.e, the number of bytes to write).
+        */
+        /**
+         * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
+         * t2 => [0ccc|cccc] [10cc|cccc]
+         * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
+         */
+#define simdutf_vec(x) vmovq_n_u16(static_cast(x))
+        // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
+        const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
+        // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
+        const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
+        // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
+        const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
 
-template
-simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const {
-  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
-  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
-  std::memcpy(dst, buf + idx, len - idx);
-  return len - idx;
-}
-
-template
-simdutf_really_inline void buf_block_reader::advance() {
-  idx += STEP_SIZE;
-}
+        // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
+        const uint16x8_t s0 = vshrq_n_u16(in, 12);
+        // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
+        const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
+        // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
+        const uint16x8_t s1s = vshlq_n_u16(s1, 2);
+        // [00bb|bbbb|0000|aaaa]
+        const uint16x8_t s2 = vorrq_u16(s0, s1s);
+        // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
+        const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
+        const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
+        const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
+        const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
+        const uint16x8_t s4 = veorq_u16(s3, m0);
+#undef simdutf_vec
 
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/buf_block_reader.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h
-/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8_validation {
+        // 4. expand code units 16-bit => 32-bit
+        const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
+        const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
 
-using namespace simd;
+        // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
+        const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+        const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+        const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0100, 0x0400,
+                                    0x1000, 0x4000 );
+        const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
+                                    0x0020, 0x0080,
+                                    0x0200, 0x0800,
+                                    0x2000, 0x8000 );
+#else
+        const uint16x8_t onemask = { 0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0100, 0x0400,
+                                    0x1000, 0x4000 };
+        const uint16x8_t twomask = { 0x0002, 0x0008,
+                                    0x0020, 0x0080,
+                                    0x0200, 0x0800,
+                                    0x2000, 0x8000 };
+#endif
+        const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
+        const uint16_t mask = vaddvq_u16(combined);
+        // The following fast path may or may not be beneficial.
+        /*if(mask == 0) {
+          // We only have three-byte code units. Use fast path.
+          const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
+          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
+          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
+          vst1q_u8(utf8_output, utf8_0);
+          utf8_output += 12;
+          vst1q_u8(utf8_output, utf8_1);
+          utf8_output += 12;
+          buf += 8;
+          continue;
+        }*/
+        const uint8_t mask0 = uint8_t(mask);
 
-  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
-// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
-// Bit 1 = Too Long (ASCII followed by continuation)
-// Bit 2 = Overlong 3-byte
-// Bit 4 = Surrogate
-// Bit 5 = Overlong 2-byte
-// Bit 7 = Two Continuations
-    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
-                                                // 11______ 11______
-    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
-    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
-    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
-    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
-    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
-    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
-                                                // 11110100 101_____
-                                                // 11110101 1001____
-                                                // 11110101 101_____
-                                                // 1111011_ 1001____
-                                                // 1111011_ 101_____
-                                                // 11111___ 1001____
-                                                // 11111___ 101_____
-    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
-                                                // 11110101 1000____
-                                                // 1111011_ 1000____
-                                                // 11111___ 1000____
-    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+        const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
+        const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
+        const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
 
-    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
-      // 0_______ ________ 
-      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
-      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
-      // 10______ ________ 
-      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
-      // 1100____ ________ 
-      TOO_SHORT | OVERLONG_2,
-      // 1101____ ________ 
-      TOO_SHORT,
-      // 1110____ ________ 
-      TOO_SHORT | OVERLONG_3 | SURROGATE,
-      // 1111____ ________ 
-      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
-    );
-    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
-    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
-      // ____0000 ________
-      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
-      // ____0001 ________
-      CARRY | OVERLONG_2,
-      // ____001_ ________
-      CARRY,
-      CARRY,
+        const uint8_t mask1 = static_cast(mask >> 8);
+        const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
+        const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
+        const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
 
-      // ____0100 ________
-      CARRY | TOO_LARGE,
-      // ____0101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      // ____011_ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+        vst1q_u8(utf8_output, utf8_0);
+        utf8_output += row0[0];
+        vst1q_u8(utf8_output, utf8_1);
+        utf8_output += row1[0];
 
-      // ____1___ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      // ____1101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000
-    );
-    const simd8 byte_2_high = input.shr<4>().lookup_16(
-      // ________ 0_______ 
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+        buf += 8;
+    // surrogate pair(s) in a register
+    } else {
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
+        if((word & 0xFF80)==0) {
+          *utf8_output++ = char(word);
+        } else if((word & 0xF800)==0) {
+          *utf8_output++ = char((word>>6) | 0b11000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else if((word &0xF800 ) != 0xD800) {
+          *utf8_output++ = char((word>>12) | 0b11100000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else {
+          // must be a surrogate pair
+          uint16_t diff = uint16_t(word - 0xD800);
+          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
+          k++;
+          uint16_t diff2 = uint16_t(next_word - 0xDC00);
+          if((diff | diff2) > 0x3FF)  { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); }
+          uint32_t value = (diff << 10) + diff2 + 0x10000;
+          *utf8_output++ = char((value>>18) | 0b11110000);
+          *utf8_output++ = char(((value>>12) & 0b111111) | 0b10000000);
+          *utf8_output++ = char(((value>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((value & 0b111111) | 0b10000000);
+        }
+      }
+      buf += k;
+    }
+  } // while
 
-      // ________ 1000____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
-      // ________ 1001____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
-      // ________ 101_____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+  return std::make_pair(buf, reinterpret_cast(utf8_output));
+}
 
-      // ________ 11______
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
-    );
-    return (byte_1_high & byte_1_low & byte_2_high);
-  }
-  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
-      const simd8 prev_input, const simd8 sc) {
-    simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
 
-  //
-  // Return nonzero if there are incomplete multibyte characters at the end of the block:
-  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
-  //
-  simdutf_really_inline simd8 is_incomplete(const simd8 input) {
-    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
-    // ... 1111____ 111_____ 11______
-    static const uint8_t max_array[32] = {
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
-    };
-    const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]);
-    return input.gt_bits(max_value);
-  }
+/*
+  Returns a pair: a result struct and utf8_output.
+  If there is an error, the count field of the result is the position of the error.
+  Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
+  A scalar routing should carry on the conversion of the tail if needed.
+*/
+template 
+std::pair arm_convert_utf16_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_out) {
+  uint8_t * utf8_output = reinterpret_cast(utf8_out);
+    const char16_t* start = buf;
+  const char16_t* end = buf + len;
 
-  struct utf8_checker {
-    // If this is nonzero, there has been a UTF-8 error.
-    simd8 error;
-    // The last input we received
-    simd8 prev_input_block;
-    // Whether the last input we received was incomplete (used for ASCII fast path)
-    simd8 prev_incomplete;
+  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
+  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
 
-    //
-    // Check whether the current bytes are valid UTF-8.
-    //
-    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
-      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
-      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
-      simd8 prev1 = input.prev<1>(prev_input);
-      simd8 sc = check_special_cases(input, prev1);
-      this->error |= check_multibyte_lengths(input, prev_input, sc);
-    }
-
-    // The only problem that can happen at EOF is that a multibyte character is too short
-    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
-    // too large in the first of two bytes.
-    simdutf_really_inline void check_eof() {
-      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
-      // possibly finish them.
-      this->error |= this->prev_incomplete;
-    }
-
-    simdutf_really_inline void check_next_input(const simd8x64& input) {
-      if(simdutf_likely(is_ascii(input))) {
-        this->error |= this->prev_incomplete;
-      } else {
-        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-        static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-            "We support either two or four chunks per 64-byte block.");
-        if(simd8x64::NUM_CHUNKS == 2) {
-          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
-          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-        } else if(simd8x64::NUM_CHUNKS == 4) {
-          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
-          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+  while (buf + 16 <= end) {
+    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
+    if(vmaxvq_u16(in) <= 0x7F) { // ASCII fast path!!!!
+        // It is common enough that we have sequences of 16 consecutive ASCII characters.
+        uint16x8_t nextin = vld1q_u16(reinterpret_cast(buf) + 8);
+        if (!match_system(big_endian)) { nextin = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(nextin))); }
+        if(vmaxvq_u16(nextin) > 0x7F) {
+          // 1. pack the bytes
+          // obviously suboptimal.
+          uint8x8_t utf8_packed = vmovn_u16(in);
+          // 2. store (8 bytes)
+          vst1_u8(utf8_output, utf8_packed);
+          // 3. adjust pointers
+          buf += 8;
+          utf8_output += 8;
+          in = nextin;
+        } else {
+          // 1. pack the bytes
+          // obviously suboptimal.
+          uint8x16_t utf8_packed = vmovn_high_u16(vmovn_u16(in), nextin);
+          // 2. store (16 bytes)
+          vst1q_u8(utf8_output, utf8_packed);
+          // 3. adjust pointers
+          buf += 16;
+          utf8_output += 16;
+          continue; // we are done for this round!
         }
-        this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]);
-        this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1];
-
-      }
     }
 
-    // do not forget to call check_eof!
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
-    }
+    if (vmaxvq_u16(in) <= 0x7FF) {
+          // 1. prepare 2-byte values
+          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
+          // expected output   : [110a|aaaa|10bb|bbbb] x 8
+          const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
+          const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
 
-  }; // struct utf8_checker
-} // namespace utf8_validation
+          // t0 = [000a|aaaa|bbbb|bb00]
+          const uint16x8_t t0 = vshlq_n_u16(in, 2);
+          // t1 = [000a|aaaa|0000|0000]
+          const uint16x8_t t1 = vandq_u16(t0, v_1f00);
+          // t2 = [0000|0000|00bb|bbbb]
+          const uint16x8_t t2 = vandq_u16(in, v_003f);
+          // t3 = [000a|aaaa|00bb|bbbb]
+          const uint16x8_t t3 = vorrq_u16(t1, t2);
+          // t4 = [110a|aaaa|10bb|bbbb]
+          const uint16x8_t t4 = vorrq_u16(t3, v_c080);
+          // 2. merge ASCII and 2-byte codewords
+          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+          const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
+          const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, in, t4));
+          // 3. prepare bitmask for 8-bit lookup
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+          const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0002, 0x0008,
+                                    0x0020, 0x0080);
+#else
+          const uint16x8_t mask = { 0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0002, 0x0008,
+                                    0x0020, 0x0080 };
+#endif
+          uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
+          // 4. pack the bytes
+          const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
+          const uint8x16_t shuffle = vld1q_u8(row + 1);
+          const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
 
-using utf8_validation::utf8_checker;
+          // 5. store bytes
+          vst1q_u8(utf8_output, utf8_packed);
 
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h
-/* begin file src/generic/utf8_validation/utf8_validator.h */
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8_validation {
+          // 6. adjust pointers
+          buf += 8;
+          utf8_output += row[0];
+          continue;
 
-/**
- * Validates that the string is actual UTF-8.
- */
-template
-bool generic_validate_utf8(const uint8_t * input, size_t length) {
-    checker c{};
-    buf_block_reader<64> reader(input, length);
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      c.check_next_input(in);
-      reader.advance();
     }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    c.check_next_input(in);
-    reader.advance();
-    c.check_eof();
-    return !c.errors();
-}
+    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
+    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
+    // it is likely an uncommon occurrence.
+    if (vmaxvq_u16(surrogates_bytemask) == 0) {
+        // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+        const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
+                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
+#else
+        const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
+                                     0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
+#endif
+        /* In this branch we handle three cases:
+           1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
+           2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
+           3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-bool generic_validate_utf8(const char * input, size_t length) {
-  return generic_validate_utf8(reinterpret_cast(input),length);
-}
+          We expand the input word (16-bit) into two code units (32-bit), thus
+          we have room for four bytes. However, we need five distinct bit
+          layouts. Note that the last byte in cases #2 and #3 is the same.
 
-/**
- * Validates that the string is actual UTF-8 and stops on errors.
- */
-template
-result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
-    checker c{};
-    buf_block_reader<64> reader(input, length);
-    size_t count{0};
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      c.check_next_input(in);
-      if(c.errors()) {
-        if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
-        result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input + count), length - count);
-        res.count += count;
-        return res;
-      }
-      reader.advance();
-      count += 64;
-    }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    c.check_next_input(in);
-    reader.advance();
-    c.check_eof();
-    if (c.errors()) {
-      if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
-      result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input) + count, length - count);
-      res.count += count;
-      return res;
-    } else {
-      return result(error_code::SUCCESS, length);
-    }
-}
+          We precompute byte 1 for case #1 and the common byte for cases #2 & #3
+          in register t2.
 
-result generic_validate_utf8_with_errors(const char * input, size_t length) {
-  return generic_validate_utf8_with_errors(reinterpret_cast(input),length);
-}
+          We precompute byte 1 for case #3 and -- **conditionally** -- precompute
+          either byte 1 for case #2 or byte 2 for case #3. Note that they
+          differ by exactly one bit.
 
-template
-bool generic_validate_ascii(const uint8_t * input, size_t length) {
-    buf_block_reader<64> reader(input, length);
-    uint8_t blocks[64]{};
-    simd::simd8x64 running_or(blocks);
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      running_or |= in;
-      reader.advance();
-    }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    running_or |= in;
-    return running_or.is_ascii();
-}
+          Finally from these two code units we build proper UTF-8 sequence, taking
+          into account the case (i.e, the number of bytes to write).
+        */
+        /**
+         * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
+         * t2 => [0ccc|cccc] [10cc|cccc]
+         * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
+         */
+#define simdutf_vec(x) vmovq_n_u16(static_cast(x))
+        // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
+        const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(in), vreinterpretq_u8_u16(dup_even)));
+        // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
+        const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
+        // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
+        const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
 
-bool generic_validate_ascii(const char * input, size_t length) {
-  return generic_validate_ascii(reinterpret_cast(input),length);
-}
+        // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
+        const uint16x8_t s0 = vshrq_n_u16(in, 12);
+        // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
+        const uint16x8_t s1 = vandq_u16(in, simdutf_vec(0b0000111111000000));
+        // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
+        const uint16x8_t s1s = vshlq_n_u16(s1, 2);
+        // [00bb|bbbb|0000|aaaa]
+        const uint16x8_t s2 = vorrq_u16(s0, s1s);
+        // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
+        const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
+        const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
+        const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(in, v_07ff);
+        const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
+        const uint16x8_t s4 = veorq_u16(s3, m0);
+#undef simdutf_vec
 
-template
-result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
-  buf_block_reader<64> reader(input, length);
-  size_t count{0};
-  while (reader.has_full_block()) {
-    simd::simd8x64 in(reader.full_block());
-    if (!in.is_ascii()) {
-      result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
-      return result(res.error, count + res.count);
-    }
-    reader.advance();
-
-    count += 64;
-  }
-  uint8_t block[64]{};
-  reader.get_remainder(block);
-  simd::simd8x64 in(block);
-  if (!in.is_ascii()) {
-    result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
-    return result(res.error, count + res.count);
-  } else {
-    return result(error_code::SUCCESS, length);
-  }
-}
-
-result generic_validate_ascii_with_errors(const char * input, size_t length) {
-  return generic_validate_ascii_with_errors(reinterpret_cast(input),length);
-}
+        // 4. expand code units 16-bit => 32-bit
+        const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
+        const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
 
-} // namespace utf8_validation
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8_validation/utf8_validator.h */
-// transcoding from UTF-8 to UTF-16
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h
-/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
+        // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
+        const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+        const uint16x8_t one_byte_bytemask = vcleq_u16(in, v_007f);
+#ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+        const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0100, 0x0400,
+                                    0x1000, 0x4000 );
+        const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
+                                    0x0020, 0x0080,
+                                    0x0200, 0x0800,
+                                    0x2000, 0x8000 );
+#else
+        const uint16x8_t onemask = { 0x0001, 0x0004,
+                                    0x0010, 0x0040,
+                                    0x0100, 0x0400,
+                                    0x1000, 0x4000 };
+        const uint16x8_t twomask = { 0x0002, 0x0008,
+                                    0x0020, 0x0080,
+                                    0x0200, 0x0800,
+                                    0x2000, 0x8000 };
+#endif
+        const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
+        const uint16_t mask = vaddvq_u16(combined);
+        // The following fast path may or may not be beneficial.
+        /*if(mask == 0) {
+          // We only have three-byte code units. Use fast path.
+          const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
+          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
+          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
+          vst1q_u8(utf8_output, utf8_0);
+          utf8_output += 12;
+          vst1q_u8(utf8_output, utf8_1);
+          utf8_output += 12;
+          buf += 8;
+          continue;
+        }*/
+        const uint8_t mask0 = uint8_t(mask);
 
+        const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
+        const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
+        const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
 
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8_to_utf16 {
+        const uint8_t mask1 = static_cast(mask >> 8);
+        const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
+        const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
+        const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
 
-using namespace simd;
+        vst1q_u8(utf8_output, utf8_0);
+        utf8_output += row0[0];
+        vst1q_u8(utf8_output, utf8_1);
+        utf8_output += row1[0];
 
-template 
-simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
-    char16_t* utf16_output) noexcept {
-  // The implementation is not specific to haswell and should be moved to the generic directory.
-  size_t pos = 0;
-  char16_t* start{utf16_output};
-  const size_t safety_margin = 16; // to avoid overruns!
-  while(pos + 64 + safety_margin <= size) {
-    // this loop could be unrolled further. For example, we could process the mask
-    // far more than 64 bytes.
-    simd8x64 in(reinterpret_cast(input + pos));
-    if(in.is_ascii()) {
-      in.store_ascii_as_utf16(utf16_output);
-      utf16_output += 64;
-      pos += 64;
+        buf += 8;
+    // surrogate pair(s) in a register
     } else {
-      // Slow path. We hope that the compiler will recognize that this is a slow path.
-      // Anything that is not a continuation mask is a 'leading byte', that is, the
-      // start of a new code point.
-      uint64_t utf8_continuation_mask = in.lt(-65 + 1);
-      // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
-      uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-      // The *start* of code points is not so useful, rather, we want the *end* of code points.
-      uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-      // We process in blocks of up to 12 bytes except possibly
-      // for fast paths which may process up to 16 bytes. For the
-      // slow path to work, we should have at least 12 input bytes left.
-      size_t max_starting_point = (pos + 64) - 12;
-      // Next loop is going to run at least five times when using solely
-      // the slow/regular path, and at least four times if there are fast paths.
-      while(pos < max_starting_point) {
-        // Performance note: our ability to compute 'consumed' and
-        // then shift and recompute is critical. If there is a
-        // latency of, say, 4 cycles on getting 'consumed', then
-        // the inner loop might have a total latency of about 6 cycles.
-        // Yet we process between 6 to 12 inputs bytes, thus we get
-        // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-        // for this section of the code. Hence, there is a limit
-        // to how much we can further increase this latency before
-        // it seriously harms performance.
-        //
-        // Thus we may allow convert_masked_utf8_to_utf16 to process
-        // more bytes at a time under a fast-path mode where 16 bytes
-        // are consumed at once (e.g., when encountering ASCII).
-        size_t consumed = convert_masked_utf8_to_utf16(input + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
-        pos += consumed;
-        utf8_end_of_code_point_mask >>= consumed;
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
+        if((word & 0xFF80)==0) {
+          *utf8_output++ = char(word);
+        } else if((word & 0xF800)==0) {
+          *utf8_output++ = char((word>>6) | 0b11000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else if((word &0xF800 ) != 0xD800) {
+          *utf8_output++ = char((word>>12) | 0b11100000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else {
+          // must be a surrogate pair
+          uint16_t diff = uint16_t(word - 0xD800);
+          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
+          k++;
+          uint16_t diff2 = uint16_t(next_word - 0xDC00);
+          if((diff | diff2) > 0x3FF)  { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast(utf8_output)); }
+          uint32_t value = (diff << 10) + diff2 + 0x10000;
+          *utf8_output++ = char((value>>18) | 0b11110000);
+          *utf8_output++ = char(((value>>12) & 0b111111) | 0b10000000);
+          *utf8_output++ = char(((value>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((value & 0b111111) | 0b10000000);
+        }
       }
-      // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block. These bytes will be processed again. So we have an
-      // 80% efficiency (in the worst case). In practice we expect an
-      // 85% to 90% efficiency.
+      buf += k;
     }
-  }
-  utf16_output += scalar::utf8_to_utf16::convert_valid(input + pos, size - pos, utf16_output);
-  return utf16_output - start;
+  } // while
+
+  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output));
 }
+/* end file src/arm64/arm_convert_utf16_to_utf8.cpp */
+/* begin file src/arm64/arm_convert_utf16_to_utf32.cpp */
+/*
+    The vectorized algorithm works on single SSE register i.e., it
+    loads eight 16-bit code units.
 
-} // namespace utf8_to_utf16
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h
-/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+    We consider three cases:
+    1. an input register contains no surrogates and each value
+       is in range 0x0000 .. 0x07ff.
+    2. an input register contains no surrogates and values are
+       is in range 0x0000 .. 0xffff.
+    3. an input register contains surrogates --- i.e. codepoints
+       can have 16 or 32 bits.
 
+    Ad 1.
 
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8_to_utf16 {
-using namespace simd;
+    When values are less than 0x0800, it means that a 16-bit code unit
+    can be converted into: 1) single UTF8 byte (when it's an ASCII
+    char) or 2) two UTF8 bytes.
 
+    For this case we do only some shuffle to obtain these 2-byte
+    codes and finally compress the whole SSE register with a single
+    shuffle.
 
-  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
-// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
-// Bit 1 = Too Long (ASCII followed by continuation)
-// Bit 2 = Overlong 3-byte
-// Bit 4 = Surrogate
-// Bit 5 = Overlong 2-byte
-// Bit 7 = Two Continuations
-    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
-                                                // 11______ 11______
-    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
-    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
-    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
-    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
-    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
-    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
-                                                // 11110100 101_____
-                                                // 11110101 1001____
-                                                // 11110101 101_____
-                                                // 1111011_ 1001____
-                                                // 1111011_ 101_____
-                                                // 11111___ 1001____
-                                                // 11111___ 101_____
-    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
-                                                // 11110101 1000____
-                                                // 1111011_ 1000____
-                                                // 11111___ 1000____
-    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+    We need 256-entry lookup table to get a compression pattern
+    and the number of output bytes in the compressed vector register.
+    Each entry occupies 17 bytes.
 
-    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
-      // 0_______ ________ 
-      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
-      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
-      // 10______ ________ 
-      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
-      // 1100____ ________ 
-      TOO_SHORT | OVERLONG_2,
-      // 1101____ ________ 
-      TOO_SHORT,
-      // 1110____ ________ 
-      TOO_SHORT | OVERLONG_3 | SURROGATE,
-      // 1111____ ________ 
-      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
-    );
-    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
-    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
-      // ____0000 ________
-      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
-      // ____0001 ________
-      CARRY | OVERLONG_2,
-      // ____001_ ________
-      CARRY,
-      CARRY,
+    Ad 2.
 
-      // ____0100 ________
-      CARRY | TOO_LARGE,
-      // ____0101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      // ____011_ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+    When values fit in 16-bit code units, but are above 0x07ff, then
+    a single word may produce one, two or three UTF8 bytes.
 
-      // ____1___ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      // ____1101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000
-    );
-    const simd8 byte_2_high = input.shr<4>().lookup_16(
-      // ________ 0_______ 
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+    We prepare data for all these three cases in two registers.
+    The first register contains lower two UTF8 bytes (used in all
+    cases), while the second one contains just the third byte for
+    the three-UTF8-bytes case.
 
-      // ________ 1000____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
-      // ________ 1001____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
-      // ________ 101_____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+    Finally these two registers are interleaved forming eight-element
+    array of 32-bit values. The array spans two SSE registers.
+    The bytes from the registers are compressed using two shuffles.
 
-      // ________ 11______
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
-    );
-    return (byte_1_high & byte_1_low & byte_2_high);
-  }
-  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
-      const simd8 prev_input, const simd8 sc) {
-    simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
+    We need 256-entry lookup table to get a compression pattern
+    and the number of output bytes in the compressed vector register.
+    Each entry occupies 17 bytes.
 
 
-  struct validating_transcoder {
-    // If this is nonzero, there has been a UTF-8 error.
-    simd8 error;
+    To summarize:
+    - We need two 256-entry tables that have 8704 bytes in total.
+*/
+/*
+  Returns a pair: the first unprocessed byte from buf and utf8_output
+  A scalar routing should carry on the conversion of the tail.
+*/
+template 
+std::pair arm_convert_utf16_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_out) {
+  uint32_t * utf32_output = reinterpret_cast(utf32_out);
+  const char16_t* end = buf + len;
 
-    validating_transcoder() : error(uint8_t(0)) {}
-    //
-    // Check whether the current bytes are valid UTF-8.
-    //
-    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
-      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
-      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
-      simd8 prev1 = input.prev<1>(prev_input);
-      simd8 sc = check_special_cases(input, prev1);
-      this->error |= check_multibyte_lengths(input, prev_input, sc);
-    }
+  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
+  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
 
+  while (buf + 8 <= end) {
+    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
 
-    template 
-    simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
-      size_t pos = 0;
-      char16_t* start{utf16_output};
-      // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
-      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
-      // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
-      // to give us a good margin.
-      size_t leading_byte = 0;
-      size_t margin = size;
-      for(; margin > 0 && leading_byte < 8; margin--) {
-        leading_byte += (int8_t(in[margin-1]) > -65);
-      }
-      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
-      const size_t safety_margin = size - margin + 1; // to avoid overruns!
-      while(pos + 64 + safety_margin <= size) {
-        simd8x64 input(reinterpret_cast(in + pos));
-        if(input.is_ascii()) {
-          input.store_ascii_as_utf16(utf16_output);
-          utf16_output += 64;
-          pos += 64;
+    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
+    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
+    // it is likely an uncommon occurrence.
+    if (vmaxvq_u16(surrogates_bytemask) == 0) {
+      // case: no surrogate pairs, extend all 16-bit code units to 32-bit code units
+      vst1q_u32(utf32_output,  vmovl_u16(vget_low_u16(in)));
+      vst1q_u32(utf32_output+4,  vmovl_high_u16(in));
+      utf32_output += 8;
+      buf += 8;
+    // surrogate pair(s) in a register
+    } else {
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
+        if((word &0xF800 ) != 0xD800) {
+          *utf32_output++ = char32_t(word);
         } else {
-          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-              "We support either two or four chunks per 64-byte block.");
-          auto zero = simd8{uint8_t(0)};
-          if(simd8x64::NUM_CHUNKS == 2) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          } else if(simd8x64::NUM_CHUNKS == 4) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
-          }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-          // We process in blocks of up to 12 bytes except possibly
-          // for fast paths which may process up to 16 bytes. For the
-          // slow path to work, we should have at least 12 input bytes left.
-          size_t max_starting_point = (pos + 64) - 12;
-          // Next loop is going to run at least five times.
-          while(pos < max_starting_point) {
-            // Performance note: our ability to compute 'consumed' and
-            // then shift and recompute is critical. If there is a
-            // latency of, say, 4 cycles on getting 'consumed', then
-            // the inner loop might have a total latency of about 6 cycles.
-            // Yet we process between 6 to 12 inputs bytes, thus we get
-            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-            // for this section of the code. Hence, there is a limit
-            // to how much we can further increase this latency before
-            // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
-            pos += consumed;
-            utf8_end_of_code_point_mask >>= consumed;
-          }
-          // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block. These bytes will be processed again. So we have an
-          // 80% efficiency (in the worst case). In practice we expect an
-          // 85% to 90% efficiency.
+          // must be a surrogate pair
+          uint16_t diff = uint16_t(word - 0xD800);
+          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
+          k++;
+          uint16_t diff2 = uint16_t(next_word - 0xDC00);
+          if((diff | diff2) > 0x3FF)  { return std::make_pair(nullptr, reinterpret_cast(utf32_output)); }
+          uint32_t value = (diff << 10) + diff2 + 0x10000;
+          *utf32_output++ = char32_t(value);
         }
       }
-      if(errors()) { return 0; }
-      if(pos < size) {
-        size_t howmany  = scalar::utf8_to_utf16::convert(in + pos, size - pos, utf16_output);
-        if(howmany == 0) { return 0; }
-        utf16_output += howmany;
-      }
-      return utf16_output - start;
+      buf += k;
     }
+  } // while
+  return std::make_pair(buf, reinterpret_cast(utf32_output));
+}
 
-    template 
-    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
-      size_t pos = 0;
-      char16_t* start{utf16_output};
-      // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
-      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
-      // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
-      // to give us a good margin.
-      size_t leading_byte = 0;
-      size_t margin = size;
-      for(; margin > 0 && leading_byte < 8; margin--) {
-        leading_byte += (int8_t(in[margin-1]) > -65);
-      }
-      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
-      const size_t safety_margin = size - margin + 1; // to avoid overruns!
-      while(pos + 64 + safety_margin <= size) {
-        simd8x64 input(reinterpret_cast(in + pos));
-        if(input.is_ascii()) {
-          input.store_ascii_as_utf16(utf16_output);
-          utf16_output += 64;
-          pos += 64;
+
+/*
+  Returns a pair: a result struct and utf8_output.
+  If there is an error, the count field of the result is the position of the error.
+  Otherwise, it is the position of the first unprocessed byte in buf (even if finished).
+  A scalar routing should carry on the conversion of the tail if needed.
+*/
+template 
+std::pair arm_convert_utf16_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_out) {
+  uint32_t * utf32_output = reinterpret_cast(utf32_out);
+  const char16_t* start = buf;
+  const char16_t* end = buf + len;
+
+  const uint16x8_t v_f800 = vmovq_n_u16((uint16_t)0xf800);
+  const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+
+  while (buf + 8 <= end) {
+    uint16x8_t in = vld1q_u16(reinterpret_cast(buf));
+    if (!match_system(big_endian)) { in = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(in))); }
+
+    const uint16x8_t surrogates_bytemask = vceqq_u16(vandq_u16(in, v_f800), v_d800);
+    // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
+    // it is likely an uncommon occurrence.
+    if (vmaxvq_u16(surrogates_bytemask) == 0) {
+      // case: no surrogate pairs, extend all 16-bit code units to 32-bit code units
+      vst1q_u32(utf32_output,  vmovl_u16(vget_low_u16(in)));
+      vst1q_u32(utf32_output+4,  vmovl_high_u16(in));
+      utf32_output += 8;
+      buf += 8;
+    // surrogate pair(s) in a register
+    } else {
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
+        if((word &0xF800 ) != 0xD800) {
+          *utf32_output++ = char32_t(word);
         } else {
-          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-              "We support either two or four chunks per 64-byte block.");
-          auto zero = simd8{uint8_t(0)};
-          if(simd8x64::NUM_CHUNKS == 2) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          } else if(simd8x64::NUM_CHUNKS == 4) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
-          }
-          if (errors()) {
-            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-            result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
-            res.count += pos;
-            return res;
-          }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-          // We process in blocks of up to 12 bytes except possibly
-          // for fast paths which may process up to 16 bytes. For the
-          // slow path to work, we should have at least 12 input bytes left.
-          size_t max_starting_point = (pos + 64) - 12;
-          // Next loop is going to run at least five times.
-          while(pos < max_starting_point) {
-            // Performance note: our ability to compute 'consumed' and
-            // then shift and recompute is critical. If there is a
-            // latency of, say, 4 cycles on getting 'consumed', then
-            // the inner loop might have a total latency of about 6 cycles.
-            // Yet we process between 6 to 12 inputs bytes, thus we get
-            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-            // for this section of the code. Hence, there is a limit
-            // to how much we can further increase this latency before
-            // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
-            pos += consumed;
-            utf8_end_of_code_point_mask >>= consumed;
-          }
-          // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block. These bytes will be processed again. So we have an
-          // 80% efficiency (in the worst case). In practice we expect an
-          // 85% to 90% efficiency.
-        }
-      }
-      if(errors()) {
-        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
-        res.count += pos;
-        return res;
-      }
-      if(pos < size) {
-        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
-        if (res.error) {    // In case of error, we want the error position
-          res.count += pos;
-          return res;
-        } else {    // In case of success, we want the number of word written
-          utf16_output += res.count;
+          // must be a surrogate pair
+          uint16_t diff = uint16_t(word - 0xD800);
+          uint16_t next_word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k + 1]) : buf[k + 1];
+          k++;
+          uint16_t diff2 = uint16_t(next_word - 0xDC00);
+          if((diff | diff2) > 0x3FF)  { return std::make_pair(result(error_code::SURROGATE, buf - start + k - 1), reinterpret_cast(utf32_output)); }
+          uint32_t value = (diff << 10) + diff2 + 0x10000;
+          *utf32_output++ = char32_t(value);
         }
       }
-      return result(error_code::SUCCESS, utf16_output - start);
+      buf += k;
     }
+  } // while
+  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf32_output));
+}
+/* end file src/arm64/arm_convert_utf16_to_utf32.cpp */
 
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
+/* begin file src/arm64/arm_convert_utf32_to_latin1.cpp */
+std::pair arm_convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) {
+  const char32_t* end = buf + len;
+  while (buf + 8 <= end) {
+    uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf));
+    uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf+4));
+
+    uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2));
+    if (vmaxvq_u16(utf16_packed) <= 0xff) {
+        // 1. pack the bytes
+        uint8x8_t latin1_packed = vmovn_u16(utf16_packed);
+        // 2. store (8 bytes)
+        vst1_u8(reinterpret_cast(latin1_output), latin1_packed);
+        // 3. adjust pointers
+        buf += 8;
+        latin1_output += 8;
+    } else {
+      return std::make_pair(nullptr, reinterpret_cast(latin1_output));
     }
+  } // while
+  return std::make_pair(buf, latin1_output);
+}
 
-  }; // struct utf8_checker
-} // utf8_to_utf16 namespace
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
-// transcoding from UTF-8 to UTF-32
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h
-/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
 
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8_to_utf32 {
+std::pair arm_convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) {
+  const char32_t* start = buf;
+  const char32_t* end = buf + len;
 
-using namespace simd;
+  while (buf + 8 <= end) {
+    uint32x4_t in1 = vld1q_u32(reinterpret_cast(buf));
+    uint32x4_t in2 = vld1q_u32(reinterpret_cast(buf+4));
 
+    uint16x8_t utf16_packed = vcombine_u16(vqmovn_u32(in1), vqmovn_u32(in2));
 
-simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
-    char32_t* utf32_output) noexcept {
-  size_t pos = 0;
-  char32_t* start{utf32_output};
-  const size_t safety_margin = 16; // to avoid overruns!
-  while(pos + 64 + safety_margin <= size) {
-    simd8x64 in(reinterpret_cast(input + pos));
-    if(in.is_ascii()) {
-      in.store_ascii_as_utf32(utf32_output);
-      utf32_output += 64;
-      pos += 64;
+    if (vmaxvq_u16(utf16_packed) <= 0xff) {
+        // 1. pack the bytes
+        uint8x8_t latin1_packed = vmovn_u16(utf16_packed);
+        // 2. store (8 bytes)
+        vst1_u8(reinterpret_cast(latin1_output), latin1_packed);
+        // 3. adjust pointers
+        buf += 8;
+        latin1_output += 8;
     } else {
-    // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
-    uint64_t utf8_continuation_mask = in.lt(-65 + 1);
-    uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-    uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-    size_t max_starting_point = (pos + 64) - 12;
-    while(pos < max_starting_point) {
-      size_t consumed = convert_masked_utf8_to_utf32(input + pos,
-                          utf8_end_of_code_point_mask, utf32_output);
-      pos += consumed;
-      utf8_end_of_code_point_mask >>= consumed;
+      // Let us do a scalar fallback.
+      for(int k = 0; k < 8; k++) {
+        uint32_t word = buf[k];
+        if(word <= 0xff) {
+          *latin1_output++ = char(word);
+        } else {
+          return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output);
+        }
       }
     }
-  }
-  utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
-  return utf32_output - start;
+  } // while
+  return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
 }
+/* end file src/arm64/arm_convert_utf32_to_latin1.cpp */
+/* begin file src/arm64/arm_convert_utf32_to_utf8.cpp */
+std::pair arm_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_out) {
+  uint8_t * utf8_output = reinterpret_cast(utf8_out);
+  const char32_t* end = buf + len;
 
+  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
 
-} // namespace utf8_to_utf32
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h
-/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
+  uint16x8_t forbidden_bytemask = vmovq_n_u16(0x0);
 
+  while (buf + 8 < end) {
+    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
+    uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf+4));
 
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8_to_utf32 {
-using namespace simd;
+    // Check if no bits set above 16th
+    if(vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) {
+      // Pack UTF-32 to UTF-16 safely (without surrogate pairs)
+      // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp)
+      uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin));
+      if(vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!!
+        // 1. pack the bytes
+        // obviously suboptimal.
+        uint8x8_t utf8_packed = vmovn_u16(utf16_packed);
+        // 2. store (8 bytes)
+        vst1_u8(utf8_output, utf8_packed);
+        // 3. adjust pointers
+        buf += 8;
+        utf8_output += 8;
+        continue; // we are done for this round!
+      }
 
+      if (vmaxvq_u16(utf16_packed) <= 0x7FF) {
+        // 1. prepare 2-byte values
+        // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
+        // expected output   : [110a|aaaa|10bb|bbbb] x 8
+        const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
+        const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
+
+        // t0 = [000a|aaaa|bbbb|bb00]
+        const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
+        // t1 = [000a|aaaa|0000|0000]
+        const uint16x8_t t1 = vandq_u16(t0, v_1f00);
+        // t2 = [0000|0000|00bb|bbbb]
+        const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
+        // t3 = [000a|aaaa|00bb|bbbb]
+        const uint16x8_t t3 = vorrq_u16(t1, t2);
+        // t4 = [110a|aaaa|10bb|bbbb]
+        const uint16x8_t t4 = vorrq_u16(t3, v_c080);
+        // 2. merge ASCII and 2-byte codewords
+        const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+        const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
+        const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, utf16_packed, t4));
+        // 3. prepare bitmask for 8-bit lookup
+  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+        const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
+                                  0x0010, 0x0040,
+                                  0x0002, 0x0008,
+                                  0x0020, 0x0080);
+  #else
+        const uint16x8_t mask = { 0x0001, 0x0004,
+                                  0x0010, 0x0040,
+                                  0x0002, 0x0008,
+                                  0x0020, 0x0080 };
+  #endif
+        uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
+        // 4. pack the bytes
+        const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
+        const uint8x16_t shuffle = vld1q_u8(row + 1);
+        const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
 
-  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
-// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
-// Bit 1 = Too Long (ASCII followed by continuation)
-// Bit 2 = Overlong 3-byte
-// Bit 4 = Surrogate
-// Bit 5 = Overlong 2-byte
-// Bit 7 = Two Continuations
-    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
-                                                // 11______ 11______
-    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
-    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
-    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
-    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
-    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
-    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
-                                                // 11110100 101_____
-                                                // 11110101 1001____
-                                                // 11110101 101_____
-                                                // 1111011_ 1001____
-                                                // 1111011_ 101_____
-                                                // 11111___ 1001____
-                                                // 11111___ 101_____
-    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
-                                                // 11110101 1000____
-                                                // 1111011_ 1000____
-                                                // 11111___ 1000____
-    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+        // 5. store bytes
+        vst1q_u8(utf8_output, utf8_packed);
 
-    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
-      // 0_______ ________ 
-      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
-      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
-      // 10______ ________ 
-      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
-      // 1100____ ________ 
-      TOO_SHORT | OVERLONG_2,
-      // 1101____ ________ 
-      TOO_SHORT,
-      // 1110____ ________ 
-      TOO_SHORT | OVERLONG_3 | SURROGATE,
-      // 1111____ ________ 
-      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
-    );
-    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
-    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
-      // ____0000 ________
-      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
-      // ____0001 ________
-      CARRY | OVERLONG_2,
-      // ____001_ ________
-      CARRY,
-      CARRY,
+        // 6. adjust pointers
+        buf += 8;
+        utf8_output += row[0];
+        continue;
+      } else {
+        // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
+        const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+        const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
+        forbidden_bytemask = vorrq_u16(vandq_u16(vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800)), forbidden_bytemask);
 
-      // ____0100 ________
-      CARRY | TOO_LARGE,
-      // ____0101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      // ____011_ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+          const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
+                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
+  #else
+          const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
+                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
+  #endif
+          /* In this branch we handle three cases:
+            1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
+            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
+            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-      // ____1___ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      // ____1101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000
-    );
-    const simd8 byte_2_high = input.shr<4>().lookup_16(
-      // ________ 0_______ 
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+            We expand the input word (16-bit) into two code units (32-bit), thus
+            we have room for four bytes. However, we need five distinct bit
+            layouts. Note that the last byte in cases #2 and #3 is the same.
 
-      // ________ 1000____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
-      // ________ 1001____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
-      // ________ 101_____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+            We precompute byte 1 for case #1 and the common byte for cases #2 & #3
+            in register t2.
 
-      // ________ 11______
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
-    );
-    return (byte_1_high & byte_1_low & byte_2_high);
-  }
-  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
-      const simd8 prev_input, const simd8 sc) {
-    simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
+            We precompute byte 1 for case #3 and -- **conditionally** -- precompute
+            either byte 1 for case #2 or byte 2 for case #3. Note that they
+            differ by exactly one bit.
 
+            Finally from these two code units we build proper UTF-8 sequence, taking
+            into account the case (i.e, the number of bytes to write).
+          */
+          /**
+           * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
+           * t2 => [0ccc|cccc] [10cc|cccc]
+           * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
+           */
+  #define simdutf_vec(x) vmovq_n_u16(static_cast(x))
+          // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
+          const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
+          // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
+          const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
+          // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
+          const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
 
-  struct validating_transcoder {
-    // If this is nonzero, there has been a UTF-8 error.
-    simd8 error;
+          // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
+          const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
+          // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
+          const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
+          // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
+          const uint16x8_t s1s = vshlq_n_u16(s1, 2);
+          // [00bb|bbbb|0000|aaaa]
+          const uint16x8_t s2 = vorrq_u16(s0, s1s);
+          // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
+          const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
+          const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
+          const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
+          const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
+          const uint16x8_t s4 = veorq_u16(s3, m0);
+  #undef simdutf_vec
 
-    validating_transcoder() : error(uint8_t(0)) {}
-    //
-    // Check whether the current bytes are valid UTF-8.
-    //
-    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
-      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
-      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
-      simd8 prev1 = input.prev<1>(prev_input);
-      simd8 sc = check_special_cases(input, prev1);
-      this->error |= check_multibyte_lengths(input, prev_input, sc);
-    }
+          // 4. expand code units 16-bit => 32-bit
+          const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
+          const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
 
+          // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
+          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+          const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
+  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+          const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
+                                      0x0010, 0x0040,
+                                      0x0100, 0x0400,
+                                      0x1000, 0x4000 );
+          const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
+                                      0x0020, 0x0080,
+                                      0x0200, 0x0800,
+                                      0x2000, 0x8000 );
+  #else
+          const uint16x8_t onemask = { 0x0001, 0x0004,
+                                      0x0010, 0x0040,
+                                      0x0100, 0x0400,
+                                      0x1000, 0x4000 };
+          const uint16x8_t twomask = { 0x0002, 0x0008,
+                                      0x0020, 0x0080,
+                                      0x0200, 0x0800,
+                                      0x2000, 0x8000 };
+  #endif
+          const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
+          const uint16_t mask = vaddvq_u16(combined);
+          // The following fast path may or may not be beneficial.
+          /*if(mask == 0) {
+            // We only have three-byte code units. Use fast path.
+            const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
+            const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
+            const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
+            vst1q_u8(utf8_output, utf8_0);
+            utf8_output += 12;
+            vst1q_u8(utf8_output, utf8_1);
+            utf8_output += 12;
+            buf += 8;
+            continue;
+          }*/
+          const uint8_t mask0 = uint8_t(mask);
+          const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
+          const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
+          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
 
+          const uint8_t mask1 = static_cast(mask >> 8);
+          const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
+          const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
+          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
 
-    simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
-      size_t pos = 0;
-      char32_t* start{utf32_output};
-      // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
-      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
-      // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
-      // to give us a good margin.
-      size_t leading_byte = 0;
-      size_t margin = size;
-      for(; margin > 0 && leading_byte < 4; margin--) {
-        leading_byte += (int8_t(in[margin-1]) > -65);
+          vst1q_u8(utf8_output, utf8_0);
+          utf8_output += row0[0];
+          vst1q_u8(utf8_output, utf8_1);
+          utf8_output += row1[0];
+
+          buf += 8;
       }
-      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
-      const size_t safety_margin = size - margin + 1; // to avoid overruns!
-      while(pos + 64 + safety_margin <= size) {
-        simd8x64 input(reinterpret_cast(in + pos));
-        if(input.is_ascii()) {
-          input.store_ascii_as_utf32(utf32_output);
-          utf32_output += 64;
-          pos += 64;
+    // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> will produce four UTF-8 bytes.
+    } else {
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFFFF80)==0) {
+          *utf8_output++ = char(word);
+        } else if((word & 0xFFFFF800)==0) {
+          *utf8_output++ = char((word>>6) | 0b11000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else if((word & 0xFFFF0000)==0) {
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); }
+          *utf8_output++ = char((word>>12) | 0b11100000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
         } else {
-          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-              "We support either two or four chunks per 64-byte block.");
-          auto zero = simd8{uint8_t(0)};
-          if(simd8x64::NUM_CHUNKS == 2) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          } else if(simd8x64::NUM_CHUNKS == 4) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
-          }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-          // We process in blocks of up to 12 bytes except possibly
-          // for fast paths which may process up to 16 bytes. For the
-          // slow path to work, we should have at least 12 input bytes left.
-          size_t max_starting_point = (pos + 64) - 12;
-          // Next loop is going to run at least five times.
-          while(pos < max_starting_point) {
-            // Performance note: our ability to compute 'consumed' and
-            // then shift and recompute is critical. If there is a
-            // latency of, say, 4 cycles on getting 'consumed', then
-            // the inner loop might have a total latency of about 6 cycles.
-            // Yet we process between 6 to 12 inputs bytes, thus we get
-            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-            // for this section of the code. Hence, there is a limit
-            // to how much we can further increase this latency before
-            // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
-                            utf8_end_of_code_point_mask, utf32_output);
-            pos += consumed;
-            utf8_end_of_code_point_mask >>= consumed;
-          }
-          // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block. These bytes will be processed again. So we have an
-          // 80% efficiency (in the worst case). In practice we expect an
-          // 85% to 90% efficiency.
+          if (word > 0x10FFFF) { return std::make_pair(nullptr, reinterpret_cast(utf8_output)); }
+          *utf8_output++ = char((word>>18) | 0b11110000);
+          *utf8_output++ = char(((word>>12) & 0b111111) | 0b10000000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
         }
       }
-      if(errors()) { return 0; }
-      if(pos < size) {
-        size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
-        if(howmany == 0) { return 0; }
-        utf32_output += howmany;
-      }
-      return utf32_output - start;
+      buf += k;
     }
+  } // while
 
-    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
-      size_t pos = 0;
-      char32_t* start{utf32_output};
-      // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
-      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
-      // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
-      // to give us a good margin.
-      size_t leading_byte = 0;
-      size_t margin = size;
-      for(; margin > 0 && leading_byte < 4; margin--) {
-        leading_byte += (int8_t(in[margin-1]) > -65);
-      }
-      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
-      const size_t safety_margin = size - margin + 1; // to avoid overruns!
-      while(pos + 64 + safety_margin <= size) {
-        simd8x64 input(reinterpret_cast(in + pos));
-        if(input.is_ascii()) {
-          input.store_ascii_as_utf32(utf32_output);
-          utf32_output += 64;
-          pos += 64;
-        } else {
-          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-              "We support either two or four chunks per 64-byte block.");
-          auto zero = simd8{uint8_t(0)};
-          if(simd8x64::NUM_CHUNKS == 2) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          } else if(simd8x64::NUM_CHUNKS == 4) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
-          }
-          if (errors()) {
-            result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
-            res.count += pos;
-            return res;
-          }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-          // We process in blocks of up to 12 bytes except possibly
-          // for fast paths which may process up to 16 bytes. For the
-          // slow path to work, we should have at least 12 input bytes left.
-          size_t max_starting_point = (pos + 64) - 12;
-          // Next loop is going to run at least five times.
-          while(pos < max_starting_point) {
-            // Performance note: our ability to compute 'consumed' and
-            // then shift and recompute is critical. If there is a
-            // latency of, say, 4 cycles on getting 'consumed', then
-            // the inner loop might have a total latency of about 6 cycles.
-            // Yet we process between 6 to 12 inputs bytes, thus we get
-            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-            // for this section of the code. Hence, there is a limit
-            // to how much we can further increase this latency before
-            // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
-                            utf8_end_of_code_point_mask, utf32_output);
-            pos += consumed;
-            utf8_end_of_code_point_mask >>= consumed;
-          }
-          // At this point there may remain between 0 and 12 bytes in the
-          // 64-byte block. These bytes will be processed again. So we have an
-          // 80% efficiency (in the worst case). In practice we expect an
-          // 85% to 90% efficiency.
-        }
-      }
-      if(errors()) {
-        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
-        res.count += pos;
-        return res;
-      }
-      if(pos < size) {
-        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
-        if (res.error) {    // In case of error, we want the error position
-          res.count += pos;
-          return res;
-        } else {    // In case of success, we want the number of word written
-          utf32_output += res.count;
-        }
-      }
-      return result(error_code::SUCCESS, utf32_output - start);
-    }
-
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
-    }
-
-  }; // struct utf8_checker
-} // utf8_to_utf32 namespace
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
-// other functions
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h
-/* begin file src/generic/utf8.h */
+  // check for invalid input
+  if (vmaxvq_u16(forbidden_bytemask) != 0) {
+    return std::make_pair(nullptr, reinterpret_cast(utf8_output));
+  }
+  return std::make_pair(buf, reinterpret_cast(utf8_output));
+}
 
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf8 {
 
-using namespace simd;
+std::pair arm_convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_out) {
+  uint8_t * utf8_output = reinterpret_cast(utf8_out);
+  const char32_t* start = buf;
+  const char32_t* end = buf + len;
 
-simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    for(;pos + 64 <= size; pos += 64) {
-      simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      count += 64 - count_ones(utf8_continuation_mask);
-    }
-    return count + scalar::utf8::count_code_points(in + pos, size - pos);
-}
+  const uint16x8_t v_c080 = vmovq_n_u16((uint16_t)0xc080);
 
+  while (buf + 8 < end) {
+    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
+    uint32x4_t nextin = vld1q_u32(reinterpret_cast(buf+4));
 
-simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    // This algorithm could no doubt be improved!
-    for(;pos + 64 <= size; pos += 64) {
-      simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      // We count one word for anything that is not a continuation (so
-      // leading bytes).
-      count += 64 - count_ones(utf8_continuation_mask);
-      int64_t utf8_4byte = input.gteq_unsigned(240);
-      count += count_ones(utf8_4byte);
-    }
-    return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
-}
+    // Check if no bits set above 16th
+    if(vmaxvq_u32(vorrq_u32(in, nextin)) <= 0xFFFF) {
+      // Pack UTF-32 to UTF-16 safely (without surrogate pairs)
+      // Apply UTF-16 => UTF-8 routine (arm_convert_utf16_to_utf8.cpp)
+      uint16x8_t utf16_packed = vcombine_u16(vmovn_u32(in), vmovn_u32(nextin));
+      if(vmaxvq_u16(utf16_packed) <= 0x7F) { // ASCII fast path!!!!
+          // 1. pack the bytes
+          // obviously suboptimal.
+          uint8x8_t utf8_packed = vmovn_u16(utf16_packed);
+          // 2. store (8 bytes)
+          vst1_u8(utf8_output, utf8_packed);
+          // 3. adjust pointers
+          buf += 8;
+          utf8_output += 8;
+          continue; // we are done for this round!
+      }
 
+      if (vmaxvq_u16(utf16_packed) <= 0x7FF) {
+        // 1. prepare 2-byte values
+        // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
+        // expected output   : [110a|aaaa|10bb|bbbb] x 8
+        const uint16x8_t v_1f00 = vmovq_n_u16((int16_t)0x1f00);
+        const uint16x8_t v_003f = vmovq_n_u16((int16_t)0x003f);
+
+        // t0 = [000a|aaaa|bbbb|bb00]
+        const uint16x8_t t0 = vshlq_n_u16(utf16_packed, 2);
+        // t1 = [000a|aaaa|0000|0000]
+        const uint16x8_t t1 = vandq_u16(t0, v_1f00);
+        // t2 = [0000|0000|00bb|bbbb]
+        const uint16x8_t t2 = vandq_u16(utf16_packed, v_003f);
+        // t3 = [000a|aaaa|00bb|bbbb]
+        const uint16x8_t t3 = vorrq_u16(t1, t2);
+        // t4 = [110a|aaaa|10bb|bbbb]
+        const uint16x8_t t4 = vorrq_u16(t3, v_c080);
+        // 2. merge ASCII and 2-byte codewords
+        const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+        const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
+        const uint8x16_t utf8_unpacked = vreinterpretq_u8_u16(vbslq_u16(one_byte_bytemask, utf16_packed, t4));
+        // 3. prepare bitmask for 8-bit lookup
+  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+        const uint16x8_t mask = make_uint16x8_t(0x0001, 0x0004,
+                                  0x0010, 0x0040,
+                                  0x0002, 0x0008,
+                                  0x0020, 0x0080);
+  #else
+        const uint16x8_t mask = { 0x0001, 0x0004,
+                                  0x0010, 0x0040,
+                                  0x0002, 0x0008,
+                                  0x0020, 0x0080 };
+  #endif
+        uint16_t m2 = vaddvq_u16(vandq_u16(one_byte_bytemask, mask));
+        // 4. pack the bytes
+        const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
+        const uint8x16_t shuffle = vld1q_u8(row + 1);
+        const uint8x16_t utf8_packed = vqtbl1q_u8(utf8_unpacked, shuffle);
 
-simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) {
-    return count_code_points(in, size);
-}
-} // utf8 namespace
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h
-/* begin file src/generic/utf16.h */
-namespace simdutf {
-namespace arm64 {
-namespace {
-namespace utf16 {
+        // 5. store bytes
+        vst1q_u8(utf8_output, utf8_packed);
 
-template 
-simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    for(;pos + 32 <= size; pos += 32) {
-      simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
-      uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
-      count += count_ones(not_pair) / 2;
-    }
-    return count + scalar::utf16::count_code_points(in + pos, size - pos);
-}
+        // 6. adjust pointers
+        buf += 8;
+        utf8_output += row[0];
+        continue;
+      } else {
+        // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
 
-template 
-simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    // This algorithm could no doubt be improved!
-    for(;pos + 32 <= size; pos += 32) {
-      simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
-      uint64_t ascii_mask = input.lteq(0x7F);
-      uint64_t twobyte_mask = input.lteq(0x7FF);
-      uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
+        // check for invalid input
+        const uint16x8_t v_d800 = vmovq_n_u16((uint16_t)0xd800);
+        const uint16x8_t v_dfff = vmovq_n_u16((uint16_t)0xdfff);
+        const uint16x8_t forbidden_bytemask = vandq_u16(vcleq_u16(utf16_packed, v_dfff), vcgeq_u16(utf16_packed, v_d800));
+        if (vmaxvq_u16(forbidden_bytemask) != 0) {
+          return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf8_output));
+        }
 
-      size_t ascii_count = count_ones(ascii_mask) / 2;
-      size_t twobyte_count = count_ones(twobyte_mask & ~ ascii_mask) / 2;
-      size_t threebyte_count = count_ones(not_pair_mask & ~ twobyte_mask) / 2;
-      size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2;
-      count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + ascii_count;
-    }
-    return count + scalar::utf16::utf8_length_from_utf16(in + pos, size - pos);
-}
+  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+          const uint16x8_t dup_even = make_uint16x8_t(0x0000, 0x0202, 0x0404, 0x0606,
+                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
+  #else
+          const uint16x8_t dup_even = {0x0000, 0x0202, 0x0404, 0x0606,
+                                      0x0808, 0x0a0a, 0x0c0c, 0x0e0e};
+  #endif
+          /* In this branch we handle three cases:
+            1. [0000|0000|0ccc|cccc] => [0ccc|cccc]                           - single UFT-8 byte
+            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
+            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-template 
-simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
-    return count_code_points(in, size);
-}
+            We expand the input word (16-bit) into two code units (32-bit), thus
+            we have room for four bytes. However, we need five distinct bit
+            layouts. Note that the last byte in cases #2 and #3 is the same.
 
-simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
-  size_t pos = 0;
+            We precompute byte 1 for case #1 and the common byte for cases #2 & #3
+            in register t2.
 
-  while (pos + 32 <= size) {
-    simd16x32 input(reinterpret_cast(in + pos));
-    input.swap_bytes();
-    input.store(reinterpret_cast(output));
-    pos += 32;
-    output += 32;
-  }
+            We precompute byte 1 for case #3 and -- **conditionally** -- precompute
+            either byte 1 for case #2 or byte 2 for case #3. Note that they
+            differ by exactly one bit.
 
-  scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
-}
+            Finally from these two code units we build proper UTF-8 sequence, taking
+            into account the case (i.e, the number of bytes to write).
+          */
+          /**
+           * Given [aaaa|bbbb|bbcc|cccc] our goal is to produce:
+           * t2 => [0ccc|cccc] [10cc|cccc]
+           * s4 => [1110|aaaa] ([110b|bbbb] OR [10bb|bbbb])
+           */
+  #define simdutf_vec(x) vmovq_n_u16(static_cast(x))
+          // [aaaa|bbbb|bbcc|cccc] => [bbcc|cccc|bbcc|cccc]
+          const uint16x8_t t0 = vreinterpretq_u16_u8(vqtbl1q_u8(vreinterpretq_u8_u16(utf16_packed), vreinterpretq_u8_u16(dup_even)));
+          // [bbcc|cccc|bbcc|cccc] => [00cc|cccc|0bcc|cccc]
+          const uint16x8_t t1 = vandq_u16(t0, simdutf_vec(0b0011111101111111));
+          // [00cc|cccc|0bcc|cccc] => [10cc|cccc|0bcc|cccc]
+          const uint16x8_t t2 = vorrq_u16 (t1, simdutf_vec(0b1000000000000000));
 
-} // utf16
-} // unnamed namespace
-} // namespace arm64
-} // namespace simdutf
-/* end file src/generic/utf16.h */
-//
-// Implementation-specific overrides
-//
-namespace simdutf {
-namespace arm64 {
+          // s0: [aaaa|bbbb|bbcc|cccc] => [0000|0000|0000|aaaa]
+          const uint16x8_t s0 = vshrq_n_u16(utf16_packed, 12);
+          // s1: [aaaa|bbbb|bbcc|cccc] => [0000|bbbb|bb00|0000]
+          const uint16x8_t s1 = vandq_u16(utf16_packed, simdutf_vec(0b0000111111000000));
+          // [0000|bbbb|bb00|0000] => [00bb|bbbb|0000|0000]
+          const uint16x8_t s1s = vshlq_n_u16(s1, 2);
+          // [00bb|bbbb|0000|aaaa]
+          const uint16x8_t s2 = vorrq_u16(s0, s1s);
+          // s3: [00bb|bbbb|0000|aaaa] => [11bb|bbbb|1110|aaaa]
+          const uint16x8_t s3 = vorrq_u16(s2, simdutf_vec(0b1100000011100000));
+          const uint16x8_t v_07ff = vmovq_n_u16((uint16_t)0x07FF);
+          const uint16x8_t one_or_two_bytes_bytemask = vcleq_u16(utf16_packed, v_07ff);
+          const uint16x8_t m0 = vbicq_u16(simdutf_vec(0b0100000000000000), one_or_two_bytes_bytemask);
+          const uint16x8_t s4 = veorq_u16(s3, m0);
+  #undef simdutf_vec
 
-simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
-  // If there is a BOM, then we trust it.
-  auto bom_encoding = simdutf::BOM::check_bom(input, length);
-  if(bom_encoding != encoding_type::unspecified) { return bom_encoding; }
-  if (length % 2 == 0) {
-    return arm_detect_encodings(input, length);
-  } else {
-    if (implementation::validate_utf8(input, length)) {
-      return simdutf::encoding_type::UTF8;
-    } else {
-      return simdutf::encoding_type::unspecified;
-    }
-  }
-}
+          // 4. expand code units 16-bit => 32-bit
+          const uint8x16_t out0 = vreinterpretq_u8_u16(vzip1q_u16(t2, s4));
+          const uint8x16_t out1 = vreinterpretq_u8_u16(vzip2q_u16(t2, s4));
 
-simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
-  return arm64::utf8_validation::generic_validate_utf8(buf,len);
-}
-
-simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
-  return arm64::utf8_validation::generic_validate_utf8_with_errors(buf,len);
-}
+          // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
+          const uint16x8_t v_007f = vmovq_n_u16((uint16_t)0x007F);
+          const uint16x8_t one_byte_bytemask = vcleq_u16(utf16_packed, v_007f);
+  #ifdef SIMDUTF_REGULAR_VISUAL_STUDIO
+          const uint16x8_t onemask = make_uint16x8_t(0x0001, 0x0004,
+                                      0x0010, 0x0040,
+                                      0x0100, 0x0400,
+                                      0x1000, 0x4000 );
+          const uint16x8_t twomask = make_uint16x8_t(0x0002, 0x0008,
+                                      0x0020, 0x0080,
+                                      0x0200, 0x0800,
+                                      0x2000, 0x8000 );
+  #else
+          const uint16x8_t onemask = { 0x0001, 0x0004,
+                                      0x0010, 0x0040,
+                                      0x0100, 0x0400,
+                                      0x1000, 0x4000 };
+          const uint16x8_t twomask = { 0x0002, 0x0008,
+                                      0x0020, 0x0080,
+                                      0x0200, 0x0800,
+                                      0x2000, 0x8000 };
+  #endif
+          const uint16x8_t combined = vorrq_u16(vandq_u16(one_byte_bytemask, onemask), vandq_u16(one_or_two_bytes_bytemask, twomask));
+          const uint16_t mask = vaddvq_u16(combined);
+          // The following fast path may or may not be beneficial.
+          /*if(mask == 0) {
+            // We only have three-byte code units. Use fast path.
+            const uint8x16_t shuffle = {2,3,1,6,7,5,10,11,9,14,15,13,0,0,0,0};
+            const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle);
+            const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle);
+            vst1q_u8(utf8_output, utf8_0);
+            utf8_output += 12;
+            vst1q_u8(utf8_output, utf8_1);
+            utf8_output += 12;
+            buf += 8;
+            continue;
+          }*/
+          const uint8_t mask0 = uint8_t(mask);
 
-simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
-  return arm64::utf8_validation::generic_validate_ascii(buf,len);
-}
+          const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
+          const uint8x16_t shuffle0 = vld1q_u8(row0 + 1);
+          const uint8x16_t utf8_0 = vqtbl1q_u8(out0, shuffle0);
 
-simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
-  return arm64::utf8_validation::generic_validate_ascii_with_errors(buf,len);
-}
+          const uint8_t mask1 = static_cast(mask >> 8);
+          const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
+          const uint8x16_t shuffle1 = vld1q_u8(row1 + 1);
+          const uint8x16_t utf8_1 = vqtbl1q_u8(out1, shuffle1);
 
-simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
-  const char16_t* tail = arm_validate_utf16(buf, len);
-  if (tail) {
-    return scalar::utf16::validate(tail, len - (tail - buf));
-  } else {
-    return false;
-  }
-}
+          vst1q_u8(utf8_output, utf8_0);
+          utf8_output += row0[0];
+          vst1q_u8(utf8_output, utf8_1);
+          utf8_output += row1[0];
 
-simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
-  const char16_t* tail = arm_validate_utf16(buf, len);
-  if (tail) {
-    return scalar::utf16::validate(tail, len - (tail - buf));
-  } else {
-    return false;
-  }
-}
+          buf += 8;
+      }
+    // At least one 32-bit word will produce a surrogate pair in UTF-16 <=> will produce four UTF-8 bytes.
+    } else {
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFFFF80)==0) {
+          *utf8_output++ = char(word);
+        } else if((word & 0xFFFFF800)==0) {
+          *utf8_output++ = char((word>>6) | 0b11000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else if((word & 0xFFFF0000)==0) {
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), reinterpret_cast(utf8_output)); }
+          *utf8_output++ = char((word>>12) | 0b11100000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else {
+          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), reinterpret_cast(utf8_output)); }
+          *utf8_output++ = char((word>>18) | 0b11110000);
+          *utf8_output++ = char(((word>>12) & 0b111111) | 0b10000000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        }
+      }
+      buf += k;
+    }
+  } // while
 
-simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
-  result res = arm_validate_utf16_with_errors(buf, len);
-  if (res.count != len) {
-    result scalar_res = scalar::utf16::validate_with_errors(buf + res.count, len - res.count);
-    return result(scalar_res.error, res.count + scalar_res.count);
-  } else {
-    return res;
-  }
+  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf8_output));
 }
+/* end file src/arm64/arm_convert_utf32_to_utf8.cpp */
+/* begin file src/arm64/arm_convert_utf32_to_utf16.cpp */
+template 
+std::pair arm_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_out) {
+  uint16_t * utf16_output = reinterpret_cast(utf16_out);
+  const char32_t* end = buf + len;
 
-simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
-  result res = arm_validate_utf16_with_errors(buf, len);
-  if (res.count != len) {
-    result scalar_res = scalar::utf16::validate_with_errors(buf + res.count, len - res.count);
-    return result(scalar_res.error, res.count + scalar_res.count);
-  } else {
-    return res;
-  }
-}
+  uint16x4_t forbidden_bytemask = vmov_n_u16(0x0);
 
-simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
-  const char32_t* tail = arm_validate_utf32le(buf, len);
-  if (tail) {
-    return scalar::utf32::validate(tail, len - (tail - buf));
-  } else {
-    return false;
-  }
-}
+  while(buf + 4 <= end) {
+    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
 
-simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
-  result res = arm_validate_utf32le_with_errors(buf, len);
-  if (res.count != len) {
-    result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count);
-    return result(scalar_res.error, res.count + scalar_res.count);
-  } else {
-    return res;
-  }
-}
+    // Check if no bits set above 16th
+    if(vmaxvq_u32(in) <= 0xFFFF) {
+      uint16x4_t utf16_packed = vmovn_u32(in);
 
-simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-  utf8_to_utf16::validating_transcoder converter;
-  return converter.convert(buf, len, utf16_output);
-}
+      const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
+      const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
+      forbidden_bytemask = vorr_u16(vand_u16(vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800)), forbidden_bytemask);
 
-simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-  utf8_to_utf16::validating_transcoder converter;
-  return converter.convert(buf, len, utf16_output);
-}
+      if (!match_system(big_endian)) { utf16_packed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(utf16_packed))); }
+      vst1_u16(utf16_output, utf16_packed);
+      utf16_output += 4;
+      buf += 4;
+    } else {
+      size_t forward = 3;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFF0000)==0) {
+          // will not generate a surrogate pair
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, reinterpret_cast(utf16_output)); }
+          *utf16_output++ = !match_system(big_endian) ? char16_t(word >> 8 | word << 8) : char16_t(word);
+        } else {
+          // will generate a surrogate pair
+          if (word > 0x10FFFF) { return std::make_pair(nullptr, reinterpret_cast(utf16_output)); }
+          word -= 0x10000;
+          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
+          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
+          if (!match_system(big_endian)) {
+            high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8);
+            low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8);
+          }
+          *utf16_output++ = char16_t(high_surrogate);
+          *utf16_output++ = char16_t(low_surrogate);
+        }
+      }
+      buf += k;
+    }
+  }
 
-simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-  utf8_to_utf16::validating_transcoder converter;
-  return converter.convert_with_errors(buf, len, utf16_output);
-}
+  // check for invalid input
+  if (vmaxv_u16(forbidden_bytemask) != 0) {
+    return std::make_pair(nullptr, reinterpret_cast(utf16_output));
+  }
 
-simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-  utf8_to_utf16::validating_transcoder converter;
-  return converter.convert_with_errors(buf, len, utf16_output);
+  return std::make_pair(buf, reinterpret_cast(utf16_output));
 }
 
-simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* input, size_t size,
-    char16_t* utf16_output) const noexcept {
-  return utf8_to_utf16::convert_valid(input, size,  utf16_output);
-}
 
-simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* input, size_t size,
-    char16_t* utf16_output) const noexcept {
-  return utf8_to_utf16::convert_valid(input, size,  utf16_output);
-}
+template 
+std::pair arm_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_out) {
+  uint16_t * utf16_output = reinterpret_cast(utf16_out);
+  const char32_t* start = buf;
+  const char32_t* end = buf + len;
 
-simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
-  utf8_to_utf32::validating_transcoder converter;
-  return converter.convert(buf, len, utf32_output);
-}
+  while(buf + 4 <= end) {
+    uint32x4_t in = vld1q_u32(reinterpret_cast(buf));
 
-simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
-  utf8_to_utf32::validating_transcoder converter;
-  return converter.convert_with_errors(buf, len, utf32_output);
-}
+    // Check if no bits set above 16th
+    if(vmaxvq_u32(in) <= 0xFFFF) {
+      uint16x4_t utf16_packed = vmovn_u32(in);
 
-simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
-    char32_t* utf32_output) const noexcept {
-  return utf8_to_utf32::convert_valid(input, size,  utf32_output);
-}
+      const uint16x4_t v_d800 = vmov_n_u16((uint16_t)0xd800);
+      const uint16x4_t v_dfff = vmov_n_u16((uint16_t)0xdfff);
+      const uint16x4_t forbidden_bytemask = vand_u16(vcle_u16(utf16_packed, v_dfff), vcge_u16(utf16_packed, v_d800));
+      if (vmaxv_u16(forbidden_bytemask) != 0) {
+        return std::make_pair(result(error_code::SURROGATE, buf - start), reinterpret_cast(utf16_output));
+      }
 
-simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  std::pair ret = arm_convert_utf16_to_utf8(buf, len, utf8_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf8_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
-  }
-  return saved_bytes;
-}
-
-simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  std::pair ret = arm_convert_utf16_to_utf8(buf, len, utf8_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf8_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
-  }
-  return saved_bytes;
-}
-
-simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
-  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
-  if (ret.first.count != len) { // All good so far, but not finished
-    result scalar_res = scalar::utf16_to_utf8::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
-    } else {
-      ret.second += scalar_res.count;
-    }
-  }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
-  return ret.first;
-}
-
-simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
-  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
-  if (ret.first.count != len) { // All good so far, but not finished
-    result scalar_res = scalar::utf16_to_utf8::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
+      if (!match_system(big_endian)) { utf16_packed = vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(utf16_packed))); }
+      vst1_u16(utf16_output, utf16_packed);
+      utf16_output += 4;
+      buf += 4;
     } else {
-      ret.second += scalar_res.count;
+      size_t forward = 3;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFF0000)==0) {
+          // will not generate a surrogate pair
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), reinterpret_cast(utf16_output)); }
+          *utf16_output++ = !match_system(big_endian) ? char16_t(word >> 8 | word << 8) : char16_t(word);
+        } else {
+          // will generate a surrogate pair
+          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), reinterpret_cast(utf16_output)); }
+          word -= 0x10000;
+          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
+          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
+          if (!match_system(big_endian)) {
+            high_surrogate = uint16_t(high_surrogate >> 8 | high_surrogate << 8);
+            low_surrogate = uint16_t(low_surrogate << 8 | low_surrogate >> 8);
+          }
+          *utf16_output++ = char16_t(high_surrogate);
+          *utf16_output++ = char16_t(low_surrogate);
+        }
+      }
+      buf += k;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
-  return ret.first;
-}
 
-simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return convert_utf16le_to_utf8(buf, len, utf8_output);
+  return std::make_pair(result(error_code::SUCCESS, buf - start), reinterpret_cast(utf16_output));
 }
+/* end file src/arm64/arm_convert_utf32_to_utf16.cpp */
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* begin file src/generic/buf_block_reader.h */
+namespace simdutf {
+namespace arm64 {
+namespace {
 
-simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return convert_utf16be_to_utf8(buf, len, utf8_output);
-}
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template
+struct buf_block_reader {
+public:
+  simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdutf_really_inline size_t block_index();
+  simdutf_really_inline bool has_full_block() const;
+  simdutf_really_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
+  simdutf_really_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
 
-simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  std::pair ret = arm_convert_utf32_to_utf8(buf, len, utf8_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf8_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
+// Routines to print masks and text for debugging bitmask operations
+simdutf_unused static char * format_input_text_64(const uint8_t *text) {
+  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
+  for (size_t i=0; i); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
   }
-  return saved_bytes;
+  buf[sizeof(simd8x64)] = '\0';
+  return buf;
 }
 
-simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
-  if (ret.first.count != len) {
-    result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
-    } else {
-      ret.second += scalar_res.count;
-    }
+// Routines to print masks and text for debugging bitmask operations
+simdutf_unused static char * format_input_text(const simd8x64& in) {
+  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
+  in.store(reinterpret_cast(buf));
+  for (size_t i=0; i); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
-  return ret.first;
+  buf[sizeof(simd8x64)] = '\0';
+  return buf;
 }
 
-simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  std::pair ret = arm_convert_utf16_to_utf32(buf, len, utf32_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf32_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
+simdutf_unused static char * format_mask(uint64_t mask) {
+  static char *buf = reinterpret_cast(malloc(64 + 1));
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
   }
-  return saved_bytes;
+  buf[64] = '\0';
+  return buf;
 }
 
-simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  std::pair ret = arm_convert_utf16_to_utf32(buf, len, utf32_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf32_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
-  }
-  return saved_bytes;
-}
+template
+simdutf_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
 
-simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
-  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
-  if (ret.first.count != len) { // All good so far, but not finished
-    result scalar_res = scalar::utf16_to_utf32::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
-    } else {
-      ret.second += scalar_res.count;
-    }
-  }
-  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit words written
-  return ret.first;
-}
+template
+simdutf_really_inline size_t buf_block_reader::block_index() { return idx; }
 
-simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
-  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
-  if (ret.first.count != len) { // All good so far, but not finished
-    result scalar_res = scalar::utf16_to_utf32::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
-    } else {
-      ret.second += scalar_res.count;
-    }
-  }
-  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit words written
-  return ret.first;
+template
+simdutf_really_inline bool buf_block_reader::has_full_block() const {
+  return idx < lenminusstep;
 }
 
-simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  return convert_utf32_to_utf8(buf, len, utf8_output);
+template
+simdutf_really_inline const uint8_t *buf_block_reader::full_block() const {
+  return &buf[idx];
 }
 
-simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  std::pair ret = arm_convert_utf32_to_utf16(buf, len, utf16_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf16_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
-  }
-  return saved_bytes;
+template
+simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
 }
 
-simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  std::pair ret = arm_convert_utf32_to_utf16(buf, len, utf16_output);
-  if (ret.first == nullptr) { return 0; }
-  size_t saved_bytes = ret.second - utf16_output;
-  if (ret.first != buf + len) {
-    const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert(
-                                        ret.first, len - (ret.first - buf), ret.second);
-    if (scalar_saved_bytes == 0) { return 0; }
-    saved_bytes += scalar_saved_bytes;
-  }
-  return saved_bytes;
+template
+simdutf_really_inline void buf_block_reader::advance() {
+  idx += STEP_SIZE;
 }
 
-simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
-  if (ret.first.count != len) {
-    result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
-    } else {
-      ret.second += scalar_res.count;
-    }
-  }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
-  return ret.first;
-}
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/buf_block_reader.h */
+/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_validation {
 
-simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
-  std::pair ret = arm_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
-  if (ret.first.count != len) {
-    result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
-                                        buf + ret.first.count, len - ret.first.count, ret.second);
-    if (scalar_res.error) {
-      scalar_res.count += ret.first.count;
-      return scalar_res;
-    } else {
-      ret.second += scalar_res.count;
-    }
-  }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
-  return ret.first;
-}
+using namespace simd;
 
-simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return convert_utf32_to_utf16le(buf, len, utf16_output);
-}
+  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
 
-simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return convert_utf32_to_utf16be(buf, len, utf16_output);
-}
+    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
+      // 0_______ ________ 
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ 
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ 
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ 
+      TOO_SHORT,
+      // 1110____ ________ 
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ 
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
 
-simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return convert_utf16le_to_utf32(buf, len, utf32_output);
-}
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
 
-simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return convert_utf16be_to_utf32(buf, len, utf32_output);
-}
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8 byte_2_high = input.shr<4>().lookup_16(
+      // ________ 0_______ 
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
 
-void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
-  utf16::change_endianness_utf16(input, length, output);
-}
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
 
-simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
-  return utf16::count_code_points(input, length);
-}
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
+      const simd8 prev_input, const simd8 sc) {
+    simd8 prev2 = input.prev<2>(prev_input);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
 
-simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
-  return utf16::count_code_points(input, length);
-}
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdutf_really_inline simd8 is_incomplete(const simd8 input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
+    };
+    const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]);
+    return input.gt_bits(max_value);
+  }
 
-simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
-  return utf8::count_code_points(input, length);
-}
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
+    // The last input we received
+    simd8 prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8 prev_incomplete;
 
-simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
-  return utf16::utf8_length_from_utf16(input, length);
-}
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      simd8 sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
 
-simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
-  return utf16::utf8_length_from_utf16(input, length);
-}
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdutf_really_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
 
-simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
-  return utf16::utf32_length_from_utf16(input, length);
-}
+    simdutf_really_inline void check_next_input(const simd8x64& input) {
+      if(simdutf_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+            "We support either two or four chunks per 64-byte block.");
+        if(simd8x64::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else if(simd8x64::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1];
 
-simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
-  return utf16::utf32_length_from_utf16(input, length);
-}
+      }
+    }
 
-simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
-  return utf8::utf16_length_from_utf8(input, length);
-}
+    // do not forget to call check_eof!
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
 
-simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
-  const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f);
-  const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff);
-  const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff);
-  const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1);
-  size_t pos = 0;
-  size_t count = 0;
-  for(;pos + 4 <= length; pos += 4) {
-    uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos));
-    const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f);
-    const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff);
-    const uint32x4_t two_bytes_bytemask = veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask);
-    const uint32x4_t three_bytes_bytemask = veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask);
+  }; // struct utf8_checker
+} // namespace utf8_validation
 
-    const uint16x8_t reduced_ascii_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1));
-    const uint16x8_t reduced_two_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1));
-    const uint16x8_t reduced_three_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1));
+using utf8_validation::utf8_checker;
 
-    const uint16x8_t compressed_bytemask0 = vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask);
-    const uint16x8_t compressed_bytemask1 = vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask);
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+/* begin file src/generic/utf8_validation/utf8_validator.h */
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_validation {
 
-    size_t ascii_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 0));
-    size_t two_bytes_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 1));
-    size_t three_bytes_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask1), 0));
-
-    count += 16 - 3*ascii_count - 2*two_bytes_count - three_bytes_count;
-  }
-  return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return !c.errors();
 }
 
-simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
-  const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff);
-  const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1);
-  size_t pos = 0;
-  size_t count = 0;
-  for(;pos + 4 <= length; pos += 4) {
-    uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos));
-    const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff);
-    const uint16x8_t reduced_bytemask = vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1));
-    const uint16x8_t compressed_bytemask = vpaddq_u16(reduced_bytemask, reduced_bytemask);
-    size_t surrogate_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask), 0));
-    count += 4 + surrogate_count;
-  }
-  return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
+bool generic_validate_utf8(const char * input, size_t length) {
+  return generic_validate_utf8(reinterpret_cast(input),length);
 }
 
-simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
-  return utf8::utf32_length_from_utf8(input, length);
+/**
+ * Validates that the string is actual UTF-8 and stops on errors.
+ */
+template
+result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    size_t count{0};
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      c.check_next_input(in);
+      if(c.errors()) {
+        if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
+        result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input + count), length - count);
+        res.count += count;
+        return res;
+      }
+      reader.advance();
+      count += 64;
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    if (c.errors()) {
+      if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
+      result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input) + count, length - count);
+      res.count += count;
+      return res;
+    } else {
+      return result(error_code::SUCCESS, length);
+    }
 }
 
-} // namespace arm64
-} // namespace simdutf
-
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/arm64/end.h
-/* begin file src/simdutf/arm64/end.h */
-/* end file src/simdutf/arm64/end.h */
-/* end file src/arm64/implementation.cpp */
-#endif
-#if SIMDUTF_IMPLEMENTATION_FALLBACK
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=fallback/implementation.cpp
-/* begin file src/fallback/implementation.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/begin.h
-/* begin file src/simdutf/fallback/begin.h */
-// redefining SIMDUTF_IMPLEMENTATION to "fallback"
-// #define SIMDUTF_IMPLEMENTATION fallback
-/* end file src/simdutf/fallback/begin.h */
-
-
-
-
-
+result generic_validate_utf8_with_errors(const char * input, size_t length) {
+  return generic_validate_utf8_with_errors(reinterpret_cast(input),length);
+}
 
+template
+bool generic_validate_ascii(const uint8_t * input, size_t length) {
+    buf_block_reader<64> reader(input, length);
+    uint8_t blocks[64]{};
+    simd::simd8x64 running_or(blocks);
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      running_or |= in;
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    running_or |= in;
+    return running_or.is_ascii();
+}
 
+bool generic_validate_ascii(const char * input, size_t length) {
+  return generic_validate_ascii(reinterpret_cast(input),length);
+}
 
-namespace simdutf {
-namespace fallback {
+template
+result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
+  buf_block_reader<64> reader(input, length);
+  size_t count{0};
+  while (reader.has_full_block()) {
+    simd::simd8x64 in(reader.full_block());
+    if (!in.is_ascii()) {
+      result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
+      return result(res.error, count + res.count);
+    }
+    reader.advance();
 
-simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
-  // If there is a BOM, then we trust it.
-  auto bom_encoding = simdutf::BOM::check_bom(input, length);
-  if(bom_encoding != encoding_type::unspecified) { return bom_encoding; }
-  int out = 0;
-  if(validate_utf8(input, length)) { out |= encoding_type::UTF8; }
-  if((length % 2) == 0) {
-    if(validate_utf16le(reinterpret_cast(input), length/2)) { out |= encoding_type::UTF16_LE; }
+    count += 64;
   }
-  if((length % 4) == 0) {
-    if(validate_utf32(reinterpret_cast(input), length/4)) { out |= encoding_type::UTF32_LE; }
+  uint8_t block[64]{};
+  reader.get_remainder(block);
+  simd::simd8x64 in(block);
+  if (!in.is_ascii()) {
+    result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
+    return result(res.error, count + res.count);
+  } else {
+    return result(error_code::SUCCESS, length);
   }
-
-  return out;
-}
-
-simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
-    return scalar::utf8::validate(buf, len);
 }
 
-simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
-    return scalar::utf8::validate_with_errors(buf, len);
+result generic_validate_ascii_with_errors(const char * input, size_t length) {
+  return generic_validate_ascii_with_errors(reinterpret_cast(input),length);
 }
 
-simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
-    return scalar::ascii::validate(buf, len);
-}
+} // namespace utf8_validation
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8_validation/utf8_validator.h */
+// transcoding from UTF-8 to UTF-16
+/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
 
-simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
-    return scalar::ascii::validate_with_errors(buf, len);
-}
 
-simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
-    return scalar::utf16::validate(buf, len);
-}
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_to_utf16 {
 
-simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
-    return scalar::utf16::validate(buf, len);
-}
+using namespace simd;
 
-simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
-    return scalar::utf16::validate_with_errors(buf, len);
+template 
+simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
+    char16_t* utf16_output) noexcept {
+  // The implementation is not specific to haswell and should be moved to the generic directory.
+  size_t pos = 0;
+  char16_t* start{utf16_output};
+  const size_t safety_margin = 16; // to avoid overruns!
+  while(pos + 64 + safety_margin <= size) {
+    // this loop could be unrolled further. For example, we could process the mask
+    // far more than 64 bytes.
+    simd8x64 in(reinterpret_cast(input + pos));
+    if(in.is_ascii()) {
+      in.store_ascii_as_utf16(utf16_output);
+      utf16_output += 64;
+      pos += 64;
+    } else {
+      // Slow path. We hope that the compiler will recognize that this is a slow path.
+      // Anything that is not a continuation mask is a 'leading byte', that is, the
+      // start of a new code point.
+      uint64_t utf8_continuation_mask = in.lt(-65 + 1);
+      // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
+      uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+      // The *start* of code points is not so useful, rather, we want the *end* of code points.
+      uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+      // We process in blocks of up to 12 bytes except possibly
+      // for fast paths which may process up to 16 bytes. For the
+      // slow path to work, we should have at least 12 input bytes left.
+      size_t max_starting_point = (pos + 64) - 12;
+      // Next loop is going to run at least five times when using solely
+      // the slow/regular path, and at least four times if there are fast paths.
+      while(pos < max_starting_point) {
+        // Performance note: our ability to compute 'consumed' and
+        // then shift and recompute is critical. If there is a
+        // latency of, say, 4 cycles on getting 'consumed', then
+        // the inner loop might have a total latency of about 6 cycles.
+        // Yet we process between 6 to 12 inputs bytes, thus we get
+        // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+        // for this section of the code. Hence, there is a limit
+        // to how much we can further increase this latency before
+        // it seriously harms performance.
+        //
+        // Thus we may allow convert_masked_utf8_to_utf16 to process
+        // more bytes at a time under a fast-path mode where 16 bytes
+        // are consumed at once (e.g., when encountering ASCII).
+        size_t consumed = convert_masked_utf8_to_utf16(input + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+        pos += consumed;
+        utf8_end_of_code_point_mask >>= consumed;
+      }
+      // At this point there may remain between 0 and 12 bytes in the
+      // 64-byte block. These bytes will be processed again. So we have an
+      // 80% efficiency (in the worst case). In practice we expect an
+      // 85% to 90% efficiency.
+    }
+  }
+  utf16_output += scalar::utf8_to_utf16::convert_valid(input + pos, size - pos, utf16_output);
+  return utf16_output - start;
 }
 
-simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
-    return scalar::utf16::validate_with_errors(buf, len);
-}
+} // namespace utf8_to_utf16
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
+/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
 
-simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
-    return scalar::utf32::validate(buf, len);
-}
 
-simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
-    return scalar::utf32::validate_with_errors(buf, len);
-}
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_to_utf16 {
+using namespace simd;
 
-simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-   return scalar::utf8_to_utf16::convert(buf, len, utf16_output);
-}
 
-simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-   return scalar::utf8_to_utf16::convert(buf, len, utf16_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-   return scalar::utf8_to_utf16::convert_with_errors(buf, len, utf16_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-   return scalar::utf8_to_utf16::convert_with_errors(buf, len, utf16_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-   return scalar::utf8_to_utf16::convert_valid(buf, len, utf16_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
-   return scalar::utf8_to_utf16::convert_valid(buf, len, utf16_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
-   return scalar::utf8_to_utf32::convert(buf, len, utf32_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
-   return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
-    char32_t* utf32_output) const noexcept {
-  return scalar::utf8_to_utf32::convert_valid(input, size,  utf32_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf16_to_utf8::convert(buf, len, utf8_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf16_to_utf8::convert(buf, len, utf8_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf16_to_utf8::convert_with_errors(buf, len, utf8_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf16_to_utf8::convert_with_errors(buf, len, utf8_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf16_to_utf8::convert_valid(buf, len, utf8_output);
-}
+  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
 
-simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf16_to_utf8::convert_valid(buf, len, utf8_output);
-}
+    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
+      // 0_______ ________ 
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ 
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ 
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ 
+      TOO_SHORT,
+      // 1110____ ________ 
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ 
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
 
-simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf32_to_utf8::convert(buf, len, utf8_output);
-}
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
 
-simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output);
-}
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8 byte_2_high = input.shr<4>().lookup_16(
+      // ________ 0_______ 
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
 
-simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output);
-}
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
 
-simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return scalar::utf32_to_utf16::convert(buf, len, utf16_output);
-}
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
+      const simd8 prev_input, const simd8 sc) {
+    simd8 prev2 = input.prev<2>(prev_input);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
 
-simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return scalar::utf32_to_utf16::convert(buf, len, utf16_output);
-}
 
-simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return scalar::utf32_to_utf16::convert_with_errors(buf, len, utf16_output);
-}
+  struct validating_transcoder {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
 
-simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return scalar::utf32_to_utf16::convert_with_errors(buf, len, utf16_output);
-}
+    validating_transcoder() : error(uint8_t(0)) {}
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      simd8 sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
 
-simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return scalar::utf32_to_utf16::convert_valid(buf, len, utf16_output);
-}
 
-simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  return scalar::utf32_to_utf16::convert_valid(buf, len, utf16_output);
-}
+    template 
+    simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
+      size_t pos = 0;
+      char16_t* start{utf16_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf16(utf16_output);
+          utf16_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) { return 0; }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_utf16::convert(in + pos, size - pos, utf16_output);
+        if(howmany == 0) { return 0; }
+        utf16_output += howmany;
+      }
+      return utf16_output - start;
+    }
 
-simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return scalar::utf16_to_utf32::convert(buf, len, utf32_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return scalar::utf16_to_utf32::convert(buf, len, utf32_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return scalar::utf16_to_utf32::convert_with_errors(buf, len, utf32_output);
-}
-
-simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return scalar::utf16_to_utf32::convert_with_errors(buf, len, utf32_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return scalar::utf16_to_utf32::convert_valid(buf, len, utf32_output);
-}
-
-simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  return scalar::utf16_to_utf32::convert_valid(buf, len, utf32_output);
-}
-
-void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
-  scalar::utf16::change_endianness_utf16(input, length, output);
-}
-
-simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
-  return scalar::utf16::count_code_points(input, length);
-}
-
-simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
-  return scalar::utf16::count_code_points(input, length);
-}
-
-simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
-  return scalar::utf8::count_code_points(input, length);
-}
-
-simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
-  return scalar::utf16::utf8_length_from_utf16(input, length);
-}
+    template 
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
+      size_t pos = 0;
+      char16_t* start{utf16_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf16(utf16_output);
+          utf16_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          if (errors()) {
+            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+            result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+            res.count += pos;
+            return res;
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        res.count += pos;
+        return res;
+      }
+      if(pos < size) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        if (res.error) {    // In case of error, we want the error position
+          res.count += pos;
+          return res;
+        } else {    // In case of success, we want the number of word written
+          utf16_output += res.count;
+        }
+      }
+      return result(error_code::SUCCESS, utf16_output - start);
+    }
 
-simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
-  return scalar::utf16::utf8_length_from_utf16(input, length);
-}
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
 
-simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
-  return scalar::utf16::utf32_length_from_utf16(input, length);
-}
+  }; // struct utf8_checker
+} // utf8_to_utf16 namespace
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+// transcoding from UTF-8 to UTF-32
+/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
 
-simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
-  return scalar::utf16::utf32_length_from_utf16(input, length);
-}
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_to_utf32 {
 
-simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
-  return scalar::utf8::utf16_length_from_utf8(input, length);
-}
+using namespace simd;
 
-simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
-  return scalar::utf32::utf8_length_from_utf32(input, length);
-}
 
-simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
-  return scalar::utf32::utf16_length_from_utf32(input, length);
+simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
+    char32_t* utf32_output) noexcept {
+  size_t pos = 0;
+  char32_t* start{utf32_output};
+  const size_t safety_margin = 16; // to avoid overruns!
+  while(pos + 64 + safety_margin <= size) {
+    simd8x64 in(reinterpret_cast(input + pos));
+    if(in.is_ascii()) {
+      in.store_ascii_as_utf32(utf32_output);
+      utf32_output += 64;
+      pos += 64;
+    } else {
+    // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
+    uint64_t utf8_continuation_mask = in.lt(-65 + 1);
+    uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+    uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+    size_t max_starting_point = (pos + 64) - 12;
+    while(pos < max_starting_point) {
+      size_t consumed = convert_masked_utf8_to_utf32(input + pos,
+                          utf8_end_of_code_point_mask, utf32_output);
+      pos += consumed;
+      utf8_end_of_code_point_mask >>= consumed;
+      }
+    }
+  }
+  utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
+  return utf32_output - start;
 }
 
-simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
-  return scalar::utf8::count_code_points(input, length);
-}
 
-} // namespace fallback
+} // namespace utf8_to_utf32
+} // unnamed namespace
+} // namespace arm64
 } // namespace simdutf
+/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
+/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/fallback/end.h
-/* begin file src/simdutf/fallback/end.h */
-/* end file src/simdutf/fallback/end.h */
-/* end file src/fallback/implementation.cpp */
-#endif
-#if SIMDUTF_IMPLEMENTATION_ICELAKE
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/implementation.cpp
-/* begin file src/icelake/implementation.cpp */
-
-
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/begin.h
-/* begin file src/simdutf/icelake/begin.h */
-// redefining SIMDUTF_IMPLEMENTATION to "icelake"
-// #define SIMDUTF_IMPLEMENTATION icelake
-
-#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
-// nothing needed.
-#else
-SIMDUTF_TARGET_ICELAKE
-#endif
 
-#if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
-SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
-#endif // end of workaround
-/* end file src/simdutf/icelake/begin.h */
 namespace simdutf {
-namespace icelake {
+namespace arm64 {
 namespace {
-#ifndef SIMDUTF_ICELAKE_H
-#error "icelake.h must be included"
-#endif
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf8_common.inl.cpp
-/* begin file src/icelake/icelake_utf8_common.inl.cpp */
-// Common procedures for both validating and non-validating conversions from UTF-8.
-enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL};
-
-using utf8_to_utf16_result = std::pair;
-using utf8_to_utf32_result = std::pair;
-
-/*
-    process_block_utf8_to_utf16 converts up to 64 bytes from 'in' from UTF-8
-    to UTF-16. When tail = SIMDUTF_FULL, then the full input buffer (64 bytes)
-    might be used. When tail = SIMDUTF_TAIL, we take into account 'gap' which
-    indicates how many input bytes are relevant.
+namespace utf8_to_utf32 {
+using namespace simd;
 
-    Returns true when the result is correct, otherwise it returns false.
 
-    The provided in and out pointers are advanced according to how many input
-    bytes have been processed, upon success.
-*/
-template 
-simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) {
-  // constants
-  __m512i mask_identity = _mm512_set_epi8(63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
-  __m512i mask_c0c0c0c0 = _mm512_set1_epi32(0xc0c0c0c0);
-  __m512i mask_80808080 = _mm512_set1_epi32(0x80808080);
-  __m512i mask_f0f0f0f0 = _mm512_set1_epi32(0xf0f0f0f0);
-  __m512i mask_dfdfdfdf_tail = _mm512_set_epi64(0xffffdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf);
-  __m512i mask_c2c2c2c2 = _mm512_set1_epi32(0xc2c2c2c2);
-  __m512i mask_ffffffff = _mm512_set1_epi32(0xffffffff);
-  __m512i mask_d7c0d7c0 = _mm512_set1_epi32(0xd7c0d7c0);
-  __m512i mask_dc00dc00 = _mm512_set1_epi32(0xdc00dc00);
-  __m512i byteflip = _mm512_setr_epi64(
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809,
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809,
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809,
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809
-        );
-  // Note that 'tail' is a compile-time constant !
-  __mmask64 b = (tail == SIMDUTF_FULL) ? 0xFFFFFFFFFFFFFFFF : (uint64_t(1) << gap) - 1;
-  __m512i input = (tail == SIMDUTF_FULL) ? _mm512_loadu_si512(in) : _mm512_maskz_loadu_epi8(b, in);
-  __mmask64 m1 = (tail == SIMDUTF_FULL) ? _mm512_cmplt_epu8_mask(input, mask_80808080) : _mm512_mask_cmplt_epu8_mask(b, input, mask_80808080);
-  if(_ktestc_mask64_u8(m1, b)) {// NOT(m1) AND b -- if all zeroes, then all ASCII
-  // alternatively, we could do 'if (m1 == b) { '
-    if (tail == SIMDUTF_FULL) {
-      in += 64;          // consumed 64 bytes
-      // we convert a full 64-byte block, writing 128 bytes.
-      __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input));
-      if(big_endian) { input1 = _mm512_shuffle_epi8(input1, byteflip); }
-      _mm512_storeu_si512(out, input1);
-      out += 32;
-      __m512i input2 = _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1));
-      if(big_endian) { input2 = _mm512_shuffle_epi8(input2, byteflip); }
-      _mm512_storeu_si512(out, input2);
-      out += 32;
-      return true; // we are done
-    } else {
-      in += gap;
-      if (gap <= 32) {
-        __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input));
-        if(big_endian) { input1 = _mm512_shuffle_epi8(input1, byteflip); }
-        _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << (gap)) - 1), input1);
-        out += gap;
-      } else {
-        __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input));
-        if(big_endian) { input1 = _mm512_shuffle_epi8(input1, byteflip); }
-        _mm512_storeu_si512(out, input1);
-        out += 32;
-        __m512i input2 = _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1));
-        if(big_endian) { input2 = _mm512_shuffle_epi8(input2, byteflip); }
-        _mm512_mask_storeu_epi16(out, __mmask32((uint32_t(1) << (gap - 32)) - 1), input2);
-        out += gap - 32;
-      }
-      return true; // we are done
-    }
-  }
-  // classify characters further
-  __mmask64 m234 = _mm512_cmp_epu8_mask(mask_c0c0c0c0, input,
-                                        _MM_CMPINT_LE); // 0xc0 <= input, 2, 3, or 4 leading byte
-  __mmask64 m34 = _mm512_cmp_epu8_mask(mask_dfdfdfdf_tail, input,
-                                       _MM_CMPINT_LT); // 0xdf < input,  3 or 4 leading byte
+  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
 
-  __mmask64 milltwobytes = _mm512_mask_cmp_epu8_mask(m234, input, mask_c2c2c2c2,
-                                                     _MM_CMPINT_LT); // 0xc0 <= input < 0xc2 (illegal two byte sequence)
-                                                                     // Overlong 2-byte sequence
-  if (_ktestz_mask64_u8(milltwobytes, milltwobytes) == 0) {
-    // Overlong 2-byte sequence
-    return false;
-  }
-  if (_ktestz_mask64_u8(m34, m34) == 0) {
-    // We have a 3-byte sequence and/or a 2-byte sequence, or possibly even a 4-byte sequence!
-    __mmask64 m4 = _mm512_cmp_epu8_mask(input, mask_f0f0f0f0,
-                                        _MM_CMPINT_NLT); // 0xf0 <= zmm0 (4 byte start bytes)
+    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
+      // 0_______ ________ 
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ 
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ 
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ 
+      TOO_SHORT,
+      // 1110____ ________ 
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ 
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
 
-    __mmask64 mask_not_ascii = (tail == SIMDUTF_FULL) ? _knot_mask64(m1) : _kand_mask64(_knot_mask64(m1), b);
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
 
-    __mmask64 mp1 = _kshiftli_mask64(m234, 1);
-    __mmask64 mp2 = _kshiftli_mask64(m34, 2);
-    // We could do it as follows...
-    // if (_kortestz_mask64_u8(m4,m4)) { // compute the bitwise OR of the 64-bit masks a and b and return 1 if all zeroes
-    // but GCC generates better code when we do:
-    if (m4 == 0) { // compute the bitwise OR of the 64-bit masks a and b and return 1 if all zeroes
-      // Fast path with 1,2,3 bytes
-      __mmask64 mc = _kor_mask64(mp1, mp2); // expected continuation bytes
-      __mmask64 m1234 = _kor_mask64(m1, m234);
-      // mismatched continuation bytes:
-      if (tail == SIMDUTF_FULL) {
-        __mmask64 xnormcm1234 = _kxnor_mask64(mc, m1234); // XNOR of mc and m1234 should be all zero if they differ
-        // the presence of a 1 bit indicates that they overlap.
-        // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return 1 if all zeroes.
-        if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { return false; }
-      } else {
-        __mmask64 bxorm1234 = _kxor_mask64(b, m1234);
-        if (mc != bxorm1234) { return false; }
-      }
-      // mend: identifying the last bytes of each sequence to be decoded
-      __mmask64 mend = _kshiftri_mask64(m1234, 1);
-      if (tail != SIMDUTF_FULL) {
-        mend = _kor_mask64(mend, (uint64_t(1) << (gap - 1)));
-      }
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8 byte_2_high = input.shr<4>().lookup_16(
+      // ________ 0_______ 
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
 
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
 
-      __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity);
-      __m512i last_and_thirdu16 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third));
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
+      const simd8 prev_input, const simd8 sc) {
+    simd8 prev2 = input.prev<2>(prev_input);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
 
-      __m512i nonasciitags = _mm512_maskz_mov_epi8(mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000  other: 11000000
-      __m512i clearedbytes = _mm512_andnot_si512(nonasciitags, input);             // high two bits cleared where not ASCII
-      __m512i lastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, last_and_thirdu16,
-                                                        clearedbytes); // the last byte of each character
 
-      __mmask64 mask_before_non_ascii = _kshiftri_mask64(mask_not_ascii, 1);               // bytes that precede non-ASCII bytes
-      __m512i indexofsecondlastbytes = _mm512_add_epi16(mask_ffffffff, last_and_thirdu16); // indices of the second last bytes
-      __m512i beforeasciibytes = _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes);
-      __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, indexofsecondlastbytes,
-                                                              beforeasciibytes); // the second last bytes (of two, three byte seq,
-                                                                                 // surrogates)
-      secondlastbytes = _mm512_slli_epi16(secondlastbytes, 6);                   // shifted into position
+  struct validating_transcoder {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
 
-      __m512i indexofthirdlastbytes = _mm512_add_epi16(mask_ffffffff,
-                                                       indexofsecondlastbytes); // indices of the second last bytes
-      __m512i thirdlastbyte = _mm512_maskz_mov_epi8(m34,
-                                                    clearedbytes); // only those that are the third last byte of a sequece
-      __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, indexofthirdlastbytes,
-                                                             thirdlastbyte); // the third last bytes (of three byte sequences, hi
-                                                                             // surrogate)
-      thirdlastbytes = _mm512_slli_epi16(thirdlastbytes, 12);                // shifted into position
-      __m512i Wout = _mm512_ternarylogic_epi32(lastbytes, secondlastbytes, thirdlastbytes, 254);
-      // the elements of Wout excluding the last element if it happens to be a high surrogate:
+    validating_transcoder() : error(uint8_t(0)) {}
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      simd8 sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
 
-      __mmask64 mprocessed = (tail == SIMDUTF_FULL) ? _pdep_u64(0xFFFFFFFF, mend) : _pdep_u64(0xFFFFFFFF, _kand_mask64(mend, b)); // we adjust mend at the end of the output.
 
 
-      // Encodings out of range...
-      {
-        // the location of 3-byte sequence start bytes in the input
-        __mmask64 m3 = m34 & (b ^ m4);
-        // words in Wout corresponding to 3-byte sequences.
-        __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend));
-        __m512i mask_08000800 = _mm512_set1_epi32(0x08000800);
-        __mmask32 Msmall800 = _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800);
-        __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800);
-        __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800);
-        __mmask32 M3s = _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800);
-        if (_kor_mask32(Msmall800, M3s)) { return false; }
+    simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
+      size_t pos = 0;
+      char32_t* start{utf32_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      int64_t nout = _mm_popcnt_u64(mprocessed);
-      in +=  64 - _lzcnt_u64(mprocessed);
-      if(big_endian) { Wout = _mm512_shuffle_epi8(Wout, byteflip); }
-      _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout);
-      out += nout;
-      return true; // ok
-    }
-    //
-    // We have a 4-byte sequence, this is the general case.
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf32(utf32_output);
+          utf32_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
+                            utf8_end_of_code_point_mask, utf32_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) { return 0; }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
+        if(howmany == 0) { return 0; }
+        utf32_output += howmany;
+      }
+      return utf32_output - start;
+    }
+
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
+      size_t pos = 0;
+      char32_t* start{utf32_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 4; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf32(utf32_output);
+          utf32_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          if (errors()) {
+            result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
+            res.count += pos;
+            return res;
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
+                            utf8_end_of_code_point_mask, utf32_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) {
+        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
+        res.count += pos;
+        return res;
+      }
+      if(pos < size) {
+        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
+        if (res.error) {    // In case of error, we want the error position
+          res.count += pos;
+          return res;
+        } else {    // In case of success, we want the number of word written
+          utf32_output += res.count;
+        }
+      }
+      return result(error_code::SUCCESS, utf32_output - start);
+    }
+
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
+
+  }; // struct utf8_checker
+} // utf8_to_utf32 namespace
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
+// other functions
+/* begin file src/generic/utf8.h */
+
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8 {
+
+using namespace simd;
+
+simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    for(;pos + 64 <= size; pos += 64) {
+      simd8x64 input(reinterpret_cast(in + pos));
+      uint64_t utf8_continuation_mask = input.gt(-65);
+      count += count_ones(utf8_continuation_mask);
+    }
+    return count + scalar::utf8::count_code_points(in + pos, size - pos);
+}
+
+simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    // This algorithm could no doubt be improved!
+    for(;pos + 64 <= size; pos += 64) {
+      simd8x64 input(reinterpret_cast(in + pos));
+      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+      // We count one word for anything that is not a continuation (so
+      // leading bytes).
+      count += 64 - count_ones(utf8_continuation_mask);
+      int64_t utf8_4byte = input.gteq_unsigned(240);
+      count += count_ones(utf8_4byte);
+    }
+    return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
+}
+
+
+simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) {
+    return count_code_points(in, size);
+}
+} // utf8 namespace
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8.h */
+/* begin file src/generic/utf16.h */
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf16 {
+
+template 
+simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    for(;pos < size/32*32; pos += 32) {
+      simd16x32 input(reinterpret_cast(in + pos));
+      if (!match_system(big_endian)) { input.swap_bytes(); }
+      uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
+      count += count_ones(not_pair) / 2;
+    }
+    return count + scalar::utf16::count_code_points(in + pos, size - pos);
+}
+
+template 
+simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    // This algorithm could no doubt be improved!
+    for(;pos < size/32*32; pos += 32) {
+      simd16x32 input(reinterpret_cast(in + pos));
+      if (!match_system(big_endian)) { input.swap_bytes(); }
+      uint64_t ascii_mask = input.lteq(0x7F);
+      uint64_t twobyte_mask = input.lteq(0x7FF);
+      uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
+
+      size_t ascii_count = count_ones(ascii_mask) / 2;
+      size_t twobyte_count = count_ones(twobyte_mask & ~ ascii_mask) / 2;
+      size_t threebyte_count = count_ones(not_pair_mask & ~ twobyte_mask) / 2;
+      size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2;
+      count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + ascii_count;
+    }
+    return count + scalar::utf16::utf8_length_from_utf16(in + pos, size - pos);
+}
+
+template 
+simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
+    return count_code_points(in, size);
+}
+
+simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
+  size_t pos = 0;
+
+  while (pos < size/32*32) {
+    simd16x32 input(reinterpret_cast(in + pos));
+    input.swap_bytes();
+    input.store(reinterpret_cast(output));
+    pos += 32;
+    output += 32;
+  }
+
+  scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
+}
+
+} // utf16
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf16.h */
+// transcoding from UTF-8 to Latin 1
+/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */
+
+
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_to_latin1 {
+using namespace simd;
+
+
+  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// For UTF-8 to Latin 1, we can allow any ASCII character, and any continuation byte,
+// but the non-ASCII leading bytes must be 0b11000011 or 0b11000010 and nothing else.
+//
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+    constexpr const uint8_t FORBIDDEN  = 0xff;
+
+    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
+      // 0_______ ________ 
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ 
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ 
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ 
+      FORBIDDEN,
+      // 1110____ ________ 
+      FORBIDDEN,
+      // 1111____ ________ 
+      FORBIDDEN
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      FORBIDDEN,
+      // ____0101 ________
+      FORBIDDEN,
+      // ____011_ ________
+      FORBIDDEN,
+      FORBIDDEN,
+
+      // ____1___ ________
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      // ____1101 ________
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN
+    );
+    const simd8 byte_2_high = input.shr<4>().lookup_16(
+      // ________ 0_______ 
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+
+  struct validating_transcoder {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
+
+    validating_transcoder() : error(uint8_t(0)) {}
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      this->error |= check_special_cases(input, prev1);
+    }
+
+
+    simdutf_really_inline size_t convert(const char* in, size_t size, char* latin1_output) {
+      size_t pos = 0;
+      char* start{latin1_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65); //twos complement of -65 is 1011 1111 ...
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in this case, we also have ASCII to account for.
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) { return 0; }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
+        if(howmany == 0) { return 0; }
+        latin1_output += howmany;
+      }
+      return latin1_output - start;
+    }
+
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char* latin1_output) {
+      size_t pos = 0;
+      char* start{latin1_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          if (errors()) {
+            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+            result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
+            res.count += pos;
+            return res;
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
+        res.count += pos;
+        return res;
+      }
+      if(pos < size) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
+        if (res.error) {    // In case of error, we want the error position
+          res.count += pos;
+          return res;
+        } else {    // In case of success, we want the number of word written
+          latin1_output += res.count;
+        }
+      }
+      return result(error_code::SUCCESS, latin1_output - start);
+    }
+
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
+
+  }; // struct utf8_checker
+} // utf8_to_latin1 namespace
+} // unnamed namespace
+} // namespace arm64
+} // namespace simdutf
+/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */
+/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */
+
+
+namespace simdutf {
+namespace arm64 {
+namespace {
+namespace utf8_to_latin1 {
+using namespace simd;
+
+
+    simdutf_really_inline size_t convert_valid(const char* in, size_t size, char* latin1_output) {
+      size_t pos = 0;
+      char* start{latin1_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65); //twos complement of -65 is 1011 1111 ...
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in this case, we also have ASCII to account for.
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
+        if(howmany == 0) { return 0; }
+        latin1_output += howmany;
+      }
+      return latin1_output - start;
+    }
+
+  }; 
+}   // utf8_to_latin1 namespace
+}   // unnamed namespace
+}   // namespace arm64
+ // namespace simdutf
+/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */
+
+// placeholder scalars
+
+//
+// Implementation-specific overrides
+//
+namespace simdutf {
+namespace arm64 {
+
+simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
+  // If there is a BOM, then we trust it.
+  auto bom_encoding = simdutf::BOM::check_bom(input, length);
+  if(bom_encoding != encoding_type::unspecified) { return bom_encoding; }
+  if (length % 2 == 0) {
+    return arm_detect_encodings(input, length);
+  } else {
+    if (implementation::validate_utf8(input, length)) {
+      return simdutf::encoding_type::UTF8;
+    } else {
+      return simdutf::encoding_type::unspecified;
+    }
+  }
+}
+
+simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return arm64::utf8_validation::generic_validate_utf8(buf,len);
+}
+
+simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
+  return arm64::utf8_validation::generic_validate_utf8_with_errors(buf,len);
+}
+
+simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
+  return arm64::utf8_validation::generic_validate_ascii(buf,len);
+}
+
+simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
+  return arm64::utf8_validation::generic_validate_ascii_with_errors(buf,len);
+}
+
+simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
+  const char16_t* tail = arm_validate_utf16(buf, len);
+  if (tail) {
+    return scalar::utf16::validate(tail, len - (tail - buf));
+  } else {
+    return false;
+  }
+}
+
+simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
+  const char16_t* tail = arm_validate_utf16(buf, len);
+  if (tail) {
+    return scalar::utf16::validate(tail, len - (tail - buf));
+  } else {
+    return false;
+  }
+}
+
+simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
+  result res = arm_validate_utf16_with_errors(buf, len);
+  if (res.count != len) {
+    result scalar_res = scalar::utf16::validate_with_errors(buf + res.count, len - res.count);
+    return result(scalar_res.error, res.count + scalar_res.count);
+  } else {
+    return res;
+  }
+}
+
+simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
+  result res = arm_validate_utf16_with_errors(buf, len);
+  if (res.count != len) {
+    result scalar_res = scalar::utf16::validate_with_errors(buf + res.count, len - res.count);
+    return result(scalar_res.error, res.count + scalar_res.count);
+  } else {
+    return res;
+  }
+}
+
+simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
+  const char32_t* tail = arm_validate_utf32le(buf, len);
+  if (tail) {
+    return scalar::utf32::validate(tail, len - (tail - buf));
+  } else {
+    return false;
+  }
+}
+
+simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
+  result res = arm_validate_utf32le_with_errors(buf, len);
+  if (res.count != len) {
+    result scalar_res = scalar::utf32::validate_with_errors(buf + res.count, len - res.count);
+    return result(scalar_res.error, res.count + scalar_res.count);
+  } else {
+    return res;
+  }
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
+  std::pair ret = arm_convert_latin1_to_utf8(buf, len, utf8_output);
+  size_t converted_chars = ret.second - utf8_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
+      ret.first, len - (ret.first - buf), ret.second);
+    converted_chars += scalar_converted_chars;
+  }
+
+  return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  std::pair ret = arm_convert_latin1_to_utf16(buf, len, utf16_output);
+  size_t converted_chars = ret.second - utf16_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert(
+      ret.first, len - (ret.first - buf), ret.second);
+    converted_chars += scalar_converted_chars;
+  }
+  return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  std::pair ret = arm_convert_latin1_to_utf16(buf, len, utf16_output);
+  size_t converted_chars = ret.second - utf16_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert(
+      ret.first, len - (ret.first - buf), ret.second);
+    converted_chars += scalar_converted_chars;
+  }
+  return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+  std::pair ret = arm_convert_latin1_to_utf32(buf, len, utf32_output);
+  size_t converted_chars = ret.second - utf32_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
+      ret.first, len - (ret.first - buf), ret.second);
+    converted_chars += scalar_converted_chars;
+  }
+  return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  utf8_to_latin1::validating_transcoder converter;
+  return converter.convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
+  utf8_to_latin1::validating_transcoder converter;
+  return converter.convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return arm64::utf8_to_latin1::convert_valid(buf,len,latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  utf8_to_utf16::validating_transcoder converter;
+  return converter.convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  utf8_to_utf16::validating_transcoder converter;
+  return converter.convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  utf8_to_utf16::validating_transcoder converter;
+  return converter.convert_with_errors(buf, len, utf16_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  utf8_to_utf16::validating_transcoder converter;
+  return converter.convert_with_errors(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* input, size_t size,
+    char16_t* utf16_output) const noexcept {
+  return utf8_to_utf16::convert_valid(input, size,  utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* input, size_t size,
+    char16_t* utf16_output) const noexcept {
+  return utf8_to_utf16::convert_valid(input, size,  utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+  utf8_to_utf32::validating_transcoder converter;
+  return converter.convert(buf, len, utf32_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+  utf8_to_utf32::validating_transcoder converter;
+  return converter.convert_with_errors(buf, len, utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
+    char32_t* utf32_output) const noexcept {
+  return utf8_to_utf32::convert_valid(input, size,  utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement a custom function.
+  return convert_utf16be_to_latin1(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement a custom function.
+  return convert_utf16le_to_latin1(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_utf8(buf, len, utf8_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf8_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_utf8(buf, len, utf8_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf8_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_utf8::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_utf8::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_utf8::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return convert_utf16le_to_utf8(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return convert_utf16be_to_utf8(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
+  std::pair ret = arm_convert_utf32_to_utf8(buf, len, utf8_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf8_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf32_to_utf8::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
+  if (ret.first.count != len) {
+    result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_utf32(buf, len, utf32_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf32_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  std::pair ret = arm_convert_utf16_to_utf32(buf, len, utf32_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf32_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_utf32::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_utf32::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_utf32::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = arm_convert_utf32_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = arm_convert_utf32_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf32_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement a custom function.
+  return convert_utf32_to_latin1(buf,len,latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
+  // optimization opportunity: implement a custom function.
+  return convert_utf32_to_utf8(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  std::pair ret = arm_convert_utf32_to_utf16(buf, len, utf16_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf16_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  std::pair ret = arm_convert_utf32_to_utf16(buf, len, utf16_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - utf16_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf32_to_utf16::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
+  if (ret.first.count != len) {
+    result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = arm_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
+  if (ret.first.count != len) {
+    result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return convert_utf32_to_utf16le(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return convert_utf32_to_utf16be(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return convert_utf16le_to_utf32(buf, len, utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return convert_utf16be_to_utf32(buf, len, utf32_output);
+}
+
+void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
+  utf16::change_endianness_utf16(input, length, output);
+}
+
+simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
+  return utf16::count_code_points(input, length);
+}
+
+simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
+  return utf16::count_code_points(input, length);
+}
+
+simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
+  return utf8::count_code_points(input, length);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
+  return count_utf8(buf,len);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
+  return scalar::utf16::latin1_length_from_utf16(length);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
+  return scalar::utf32::latin1_length_from_utf32(length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t length) const noexcept {
+  // See https://lemire.me/blog/2023/05/15/computing-the-utf-8-size-of-a-latin-1-string-quickly-arm-neon-edition/
+  // credit to Pete Cawley
+  const uint8_t *data = reinterpret_cast(input);
+  uint64_t result = 0;
+  const int lanes = sizeof(uint8x16_t);
+  uint8_t rem = length % lanes;
+  const uint8_t *simd_end = data + (length / lanes) * lanes;
+  const uint8x16_t threshold = vdupq_n_u8(0x80);
+  for (; data < simd_end; data += lanes) {
+    // load 16 bytes
+    uint8x16_t input_vec = vld1q_u8(data);
+    // compare to threshold (0x80)
+    uint8x16_t withhighbit = vcgeq_u8(input_vec, threshold);
+    // vertical addition
+    result -= vaddvq_s8(vreinterpretq_s8_u8(withhighbit));
+  }
+  // scalar tail
+  for (uint8_t j = 0; j < rem; j++) {
+    result += (simd_end[j] >> 7);
+  }
+  return result + length;
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
+  return utf16::utf8_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
+  return utf16::utf8_length_from_utf16(input, length);
+}
+
+
+simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf16_length_from_latin1(length);
+}
+
+
+simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf32_length_from_latin1(length);
+}
+
+
+
+simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
+  return utf16::utf32_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
+  return utf16::utf32_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
+  return utf8::utf16_length_from_utf8(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
+  const uint32x4_t v_7f = vmovq_n_u32((uint32_t)0x7f);
+  const uint32x4_t v_7ff = vmovq_n_u32((uint32_t)0x7ff);
+  const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff);
+  const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1);
+  size_t pos = 0;
+  size_t count = 0;
+  for(;pos + 4 <= length; pos += 4) {
+    uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos));
+    const uint32x4_t ascii_bytes_bytemask = vcleq_u32(in, v_7f);
+    const uint32x4_t one_two_bytes_bytemask = vcleq_u32(in, v_7ff);
+    const uint32x4_t two_bytes_bytemask = veorq_u32(one_two_bytes_bytemask, ascii_bytes_bytemask);
+    const uint32x4_t three_bytes_bytemask = veorq_u32(vcleq_u32(in, v_ffff), one_two_bytes_bytemask);
+
+    const uint16x8_t reduced_ascii_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(ascii_bytes_bytemask, v_1));
+    const uint16x8_t reduced_two_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(two_bytes_bytemask, v_1));
+    const uint16x8_t reduced_three_bytes_bytemask = vreinterpretq_u16_u32(vandq_u32(three_bytes_bytemask, v_1));
+
+    const uint16x8_t compressed_bytemask0 = vpaddq_u16(reduced_ascii_bytes_bytemask, reduced_two_bytes_bytemask);
+    const uint16x8_t compressed_bytemask1 = vpaddq_u16(reduced_three_bytes_bytemask, reduced_three_bytes_bytemask);
+
+    size_t ascii_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 0));
+    size_t two_bytes_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask0), 1));
+    size_t three_bytes_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask1), 0));
+
+    count += 16 - 3*ascii_count - 2*two_bytes_count - three_bytes_count;
+  }
+  return count + scalar::utf32::utf8_length_from_utf32(input + pos, length - pos);
+}
+
+simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
+  const uint32x4_t v_ffff = vmovq_n_u32((uint32_t)0xffff);
+  const uint32x4_t v_1 = vmovq_n_u32((uint32_t)0x1);
+  size_t pos = 0;
+  size_t count = 0;
+  for(;pos + 4 <= length; pos += 4) {
+    uint32x4_t in = vld1q_u32(reinterpret_cast(input + pos));
+    const uint32x4_t surrogate_bytemask = vcgtq_u32(in, v_ffff);
+    const uint16x8_t reduced_bytemask = vreinterpretq_u16_u32(vandq_u32(surrogate_bytemask, v_1));
+    const uint16x8_t compressed_bytemask = vpaddq_u16(reduced_bytemask, reduced_bytemask);
+    size_t surrogate_count = count_ones(vgetq_lane_u64(vreinterpretq_u64_u16(compressed_bytemask), 0));
+    count += 4 + surrogate_count;
+  }
+  return count + scalar::utf32::utf16_length_from_utf32(input + pos, length - pos);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
+  return utf8::utf32_length_from_utf8(input, length);
+}
+
+} // namespace arm64
+} // namespace simdutf
+
+/* begin file src/simdutf/arm64/end.h */
+/* end file src/simdutf/arm64/end.h */
+/* end file src/arm64/implementation.cpp */
+#endif
+#if SIMDUTF_IMPLEMENTATION_FALLBACK
+/* begin file src/fallback/implementation.cpp */
+/* begin file src/simdutf/fallback/begin.h */
+// redefining SIMDUTF_IMPLEMENTATION to "fallback"
+// #define SIMDUTF_IMPLEMENTATION fallback
+/* end file src/simdutf/fallback/begin.h */
+
+
+
+
+
+
+
+
+
+namespace simdutf {
+namespace fallback {
+
+simdutf_warn_unused int implementation::detect_encodings(const char * input, size_t length) const noexcept {
+  // If there is a BOM, then we trust it.
+  auto bom_encoding = simdutf::BOM::check_bom(input, length);
+  if(bom_encoding != encoding_type::unspecified) { return bom_encoding; }
+  int out = 0;
+  if(validate_utf8(input, length)) { out |= encoding_type::UTF8; }
+  if((length % 2) == 0) {
+    if(validate_utf16le(reinterpret_cast(input), length/2)) { out |= encoding_type::UTF16_LE; }
+  }
+  if((length % 4) == 0) {
+    if(validate_utf32(reinterpret_cast(input), length/4)) { out |= encoding_type::UTF32_LE; }
+  }
+
+  return out;
+}
+
+simdutf_warn_unused bool implementation::validate_utf8(const char *buf, size_t len) const noexcept {
+  return scalar::utf8::validate(buf, len);
+}
+
+simdutf_warn_unused result implementation::validate_utf8_with_errors(const char *buf, size_t len) const noexcept {
+  return scalar::utf8::validate_with_errors(buf, len);
+}
+
+simdutf_warn_unused bool implementation::validate_ascii(const char *buf, size_t len) const noexcept {
+  return scalar::ascii::validate(buf, len);
+}
+
+simdutf_warn_unused result implementation::validate_ascii_with_errors(const char *buf, size_t len) const noexcept {
+  return scalar::ascii::validate_with_errors(buf, len);
+}
+
+simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, size_t len) const noexcept {
+  return scalar::utf16::validate(buf, len);
+}
+
+simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, size_t len) const noexcept {
+  return scalar::utf16::validate(buf, len);
+}
+
+simdutf_warn_unused result implementation::validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept {
+  return scalar::utf16::validate_with_errors(buf, len);
+}
+
+simdutf_warn_unused result implementation::validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept {
+  return scalar::utf16::validate_with_errors(buf, len);
+}
+
+simdutf_warn_unused bool implementation::validate_utf32(const char32_t *buf, size_t len) const noexcept {
+  return scalar::utf32::validate(buf, len);
+}
+
+simdutf_warn_unused result implementation::validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept {
+  return scalar::utf32::validate_with_errors(buf, len);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::latin1_to_utf8::convert(buf,len,utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::latin1_to_utf16::convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::latin1_to_utf16::convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char * buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::latin1_to_utf32::convert(buf,len,utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf8_to_latin1::convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf8_to_latin1::convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf8_to_latin1::convert_valid(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf8_to_utf16::convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf8_to_utf16::convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_utf16le_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf8_to_utf16::convert_with_errors(buf, len, utf16_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_utf16be_with_errors(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf8_to_utf16::convert_with_errors(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf8_to_utf16::convert_valid(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf8_to_utf16::convert_valid(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf8_to_utf32::convert(buf, len, utf32_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_utf32_with_errors(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf8_to_utf32::convert_with_errors(buf, len, utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const char* input, size_t size,
+    char32_t* utf32_output) const noexcept {
+  return scalar::utf8_to_utf32::convert_valid(input, size,  utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf16_to_latin1::convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf16_to_latin1::convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf16_to_latin1::convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf16_to_latin1::convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf16_to_latin1::convert_valid(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf16_to_latin1::convert_valid(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf16_to_utf8::convert(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf16_to_utf8::convert(buf, len, utf8_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf16_to_utf8::convert_with_errors(buf, len, utf8_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf16_to_utf8::convert_with_errors(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf16_to_utf8::convert_valid(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf16_to_utf8::convert_valid(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf32_to_latin1::convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf32_to_latin1::convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf32_to_latin1::convert_valid(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf32_to_utf8::convert(buf, len, utf8_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf32_to_utf8::convert_with_errors(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
+  return scalar::utf32_to_utf8::convert_valid(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf32_to_utf16::convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf32_to_utf16::convert(buf, len, utf16_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf32_to_utf16::convert_with_errors(buf, len, utf16_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf32_to_utf16::convert_with_errors(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16le(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf32_to_utf16::convert_valid(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_utf16be(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return scalar::utf32_to_utf16::convert_valid(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf16_to_utf32::convert(buf, len, utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf16_to_utf32::convert(buf, len, utf32_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf16_to_utf32::convert_with_errors(buf, len, utf32_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf16_to_utf32::convert_with_errors(buf, len, utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf16_to_utf32::convert_valid(buf, len, utf32_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf32(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
+  return scalar::utf16_to_utf32::convert_valid(buf, len, utf32_output);
+}
+
+void implementation::change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept {
+  scalar::utf16::change_endianness_utf16(input, length, output);
+}
+
+simdutf_warn_unused size_t implementation::count_utf16le(const char16_t * input, size_t length) const noexcept {
+  return scalar::utf16::count_code_points(input, length);
+}
+
+simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input, size_t length) const noexcept {
+  return scalar::utf16::count_code_points(input, length);
+}
+
+simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
+  return scalar::utf8::count_code_points(input, length);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
+  return scalar::utf8::latin1_length_from_utf8(buf,len);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
+  return scalar::utf16::latin1_length_from_utf16(length);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
+  return length;
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t length) const noexcept {
+  return scalar::latin1::utf8_length_from_latin1(input,length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
+  return scalar::utf16::utf8_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
+  return scalar::utf16::utf8_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
+  return scalar::utf16::utf32_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept {
+  return scalar::utf16::utf32_length_from_utf16(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf16_length_from_latin1(length);
+}
+
+simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
+  return scalar::utf8::utf16_length_from_utf8(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
+  return scalar::utf32::utf8_length_from_utf32(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept {
+  return scalar::utf32::utf16_length_from_utf32(input, length);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf32_length_from_latin1(length);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
+  return scalar::utf8::count_code_points(input, length);
+}
+
+} // namespace fallback
+} // namespace simdutf
+
+/* begin file src/simdutf/fallback/end.h */
+/* end file src/simdutf/fallback/end.h */
+/* end file src/fallback/implementation.cpp */
+#endif
+#if SIMDUTF_IMPLEMENTATION_ICELAKE
+/* begin file src/icelake/implementation.cpp */
+
+
+/* begin file src/simdutf/icelake/begin.h */
+// redefining SIMDUTF_IMPLEMENTATION to "icelake"
+// #define SIMDUTF_IMPLEMENTATION icelake
+
+#if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
+// nothing needed.
+#else
+SIMDUTF_TARGET_ICELAKE
+#endif
+
+#if SIMDUTF_GCC11ORMORE // workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105593
+SIMDUTF_DISABLE_GCC_WARNING(-Wmaybe-uninitialized)
+#endif // end of workaround
+/* end file src/simdutf/icelake/begin.h */
+namespace simdutf {
+namespace icelake {
+namespace {
+#ifndef SIMDUTF_ICELAKE_H
+#error "icelake.h must be included"
+#endif
+/* begin file src/icelake/icelake_utf8_common.inl.cpp */
+// Common procedures for both validating and non-validating conversions from UTF-8.
+enum block_processing_mode { SIMDUTF_FULL, SIMDUTF_TAIL};
+
+using utf8_to_utf16_result = std::pair;
+using utf8_to_utf32_result = std::pair;
+
+/*
+    process_block_utf8_to_utf16 converts up to 64 bytes from 'in' from UTF-8
+    to UTF-16. When tail = SIMDUTF_FULL, then the full input buffer (64 bytes)
+    might be used. When tail = SIMDUTF_TAIL, we take into account 'gap' which
+    indicates how many input bytes are relevant.
+
+    Returns true when the result is correct, otherwise it returns false.
+
+    The provided in and out pointers are advanced according to how many input
+    bytes have been processed, upon success.
+*/
+template 
+simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t *&out, size_t gap) {
+  // constants
+  __m512i mask_identity = _mm512_set_epi8(63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+  __m512i mask_c0c0c0c0 = _mm512_set1_epi32(0xc0c0c0c0);
+  __m512i mask_80808080 = _mm512_set1_epi32(0x80808080);
+  __m512i mask_f0f0f0f0 = _mm512_set1_epi32(0xf0f0f0f0);
+  __m512i mask_dfdfdfdf_tail = _mm512_set_epi64(0xffffdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf, 0xdfdfdfdfdfdfdfdf);
+  __m512i mask_c2c2c2c2 = _mm512_set1_epi32(0xc2c2c2c2);
+  __m512i mask_ffffffff = _mm512_set1_epi32(0xffffffff);
+  __m512i mask_d7c0d7c0 = _mm512_set1_epi32(0xd7c0d7c0);
+  __m512i mask_dc00dc00 = _mm512_set1_epi32(0xdc00dc00);
+  __m512i byteflip = _mm512_setr_epi64(
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809,
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809,
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809,
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809
+        );
+  // Note that 'tail' is a compile-time constant !
+  __mmask64 b = (tail == SIMDUTF_FULL) ? 0xFFFFFFFFFFFFFFFF : (uint64_t(1) << gap) - 1;
+  __m512i input = (tail == SIMDUTF_FULL) ? _mm512_loadu_si512(in) : _mm512_maskz_loadu_epi8(b, in);
+  __mmask64 m1 = (tail == SIMDUTF_FULL) ? _mm512_cmplt_epu8_mask(input, mask_80808080) : _mm512_mask_cmplt_epu8_mask(b, input, mask_80808080);
+  if(_ktestc_mask64_u8(m1, b)) {// NOT(m1) AND b -- if all zeroes, then all ASCII
+  // alternatively, we could do 'if (m1 == b) { '
+    if (tail == SIMDUTF_FULL) {
+      in += 64;          // consumed 64 bytes
+      // we convert a full 64-byte block, writing 128 bytes.
+      __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input));
+      if(big_endian) { input1 = _mm512_shuffle_epi8(input1, byteflip); }
+      _mm512_storeu_si512(out, input1);
+      out += 32;
+      __m512i input2 = _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1));
+      if(big_endian) { input2 = _mm512_shuffle_epi8(input2, byteflip); }
+      _mm512_storeu_si512(out, input2);
+      out += 32;
+      return true; // we are done
+    } else {
+      in += gap;
+      if (gap <= 32) {
+        __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input));
+        if(big_endian) { input1 = _mm512_shuffle_epi8(input1, byteflip); }
+        _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << (gap)) - 1), input1);
+        out += gap;
+      } else {
+        __m512i input1 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(input));
+        if(big_endian) { input1 = _mm512_shuffle_epi8(input1, byteflip); }
+        _mm512_storeu_si512(out, input1);
+        out += 32;
+        __m512i input2 = _mm512_cvtepu8_epi16(_mm512_extracti64x4_epi64(input, 1));
+        if(big_endian) { input2 = _mm512_shuffle_epi8(input2, byteflip); }
+        _mm512_mask_storeu_epi16(out, __mmask32((uint32_t(1) << (gap - 32)) - 1), input2);
+        out += gap - 32;
+      }
+      return true; // we are done
+    }
+  }
+  // classify characters further
+  __mmask64 m234 = _mm512_cmp_epu8_mask(mask_c0c0c0c0, input,
+                                        _MM_CMPINT_LE); // 0xc0 <= input, 2, 3, or 4 leading byte
+  __mmask64 m34 = _mm512_cmp_epu8_mask(mask_dfdfdfdf_tail, input,
+                                       _MM_CMPINT_LT); // 0xdf < input,  3 or 4 leading byte
+
+  __mmask64 milltwobytes = _mm512_mask_cmp_epu8_mask(m234, input, mask_c2c2c2c2,
+                                                     _MM_CMPINT_LT); // 0xc0 <= input < 0xc2 (illegal two byte sequence)
+                                                                     // Overlong 2-byte sequence
+  if (_ktestz_mask64_u8(milltwobytes, milltwobytes) == 0) {
+    // Overlong 2-byte sequence
+    return false;
+  }
+  if (_ktestz_mask64_u8(m34, m34) == 0) {
+    // We have a 3-byte sequence and/or a 2-byte sequence, or possibly even a 4-byte sequence!
+    __mmask64 m4 = _mm512_cmp_epu8_mask(input, mask_f0f0f0f0,
+                                        _MM_CMPINT_NLT); // 0xf0 <= zmm0 (4 byte start bytes)
+
+    __mmask64 mask_not_ascii = (tail == SIMDUTF_FULL) ? _knot_mask64(m1) : _kand_mask64(_knot_mask64(m1), b);
+
+    __mmask64 mp1 = _kshiftli_mask64(m234, 1);
+    __mmask64 mp2 = _kshiftli_mask64(m34, 2);
+    // We could do it as follows...
+    // if (_kortestz_mask64_u8(m4,m4)) { // compute the bitwise OR of the 64-bit masks a and b and return 1 if all zeroes
+    // but GCC generates better code when we do:
+    if (m4 == 0) { // compute the bitwise OR of the 64-bit masks a and b and return 1 if all zeroes
+      // Fast path with 1,2,3 bytes
+      __mmask64 mc = _kor_mask64(mp1, mp2); // expected continuation bytes
+      __mmask64 m1234 = _kor_mask64(m1, m234);
+      // mismatched continuation bytes:
+      if (tail == SIMDUTF_FULL) {
+        __mmask64 xnormcm1234 = _kxnor_mask64(mc, m1234); // XNOR of mc and m1234 should be all zero if they differ
+        // the presence of a 1 bit indicates that they overlap.
+        // _kortestz_mask64_u8: compute the bitwise OR of 64-bit masksand return 1 if all zeroes.
+        if (!_kortestz_mask64_u8(xnormcm1234, xnormcm1234)) { return false; }
+      } else {
+        __mmask64 bxorm1234 = _kxor_mask64(b, m1234);
+        if (mc != bxorm1234) { return false; }
+      }
+      // mend: identifying the last bytes of each sequence to be decoded
+      __mmask64 mend = _kshiftri_mask64(m1234, 1);
+      if (tail != SIMDUTF_FULL) {
+        mend = _kor_mask64(mend, (uint64_t(1) << (gap - 1)));
+      }
+
+
+      __m512i last_and_third = _mm512_maskz_compress_epi8(mend, mask_identity);
+      __m512i last_and_thirdu16 = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(last_and_third));
+
+      __m512i nonasciitags = _mm512_maskz_mov_epi8(mask_not_ascii, mask_c0c0c0c0); // ASCII: 00000000  other: 11000000
+      __m512i clearedbytes = _mm512_andnot_si512(nonasciitags, input);             // high two bits cleared where not ASCII
+      __m512i lastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, last_and_thirdu16,
+                                                        clearedbytes); // the last byte of each character
+
+      __mmask64 mask_before_non_ascii = _kshiftri_mask64(mask_not_ascii, 1);               // bytes that precede non-ASCII bytes
+      __m512i indexofsecondlastbytes = _mm512_add_epi16(mask_ffffffff, last_and_thirdu16); // indices of the second last bytes
+      __m512i beforeasciibytes = _mm512_maskz_mov_epi8(mask_before_non_ascii, clearedbytes);
+      __m512i secondlastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, indexofsecondlastbytes,
+                                                              beforeasciibytes); // the second last bytes (of two, three byte seq,
+                                                                                 // surrogates)
+      secondlastbytes = _mm512_slli_epi16(secondlastbytes, 6);                   // shifted into position
+
+      __m512i indexofthirdlastbytes = _mm512_add_epi16(mask_ffffffff,
+                                                       indexofsecondlastbytes); // indices of the second last bytes
+      __m512i thirdlastbyte = _mm512_maskz_mov_epi8(m34,
+                                                    clearedbytes); // only those that are the third last byte of a sequece
+      __m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, indexofthirdlastbytes,
+                                                             thirdlastbyte); // the third last bytes (of three byte sequences, hi
+                                                                             // surrogate)
+      thirdlastbytes = _mm512_slli_epi16(thirdlastbytes, 12);                // shifted into position
+      __m512i Wout = _mm512_ternarylogic_epi32(lastbytes, secondlastbytes, thirdlastbytes, 254);
+      // the elements of Wout excluding the last element if it happens to be a high surrogate:
+
+      __mmask64 mprocessed = (tail == SIMDUTF_FULL) ? _pdep_u64(0xFFFFFFFF, mend) : _pdep_u64(0xFFFFFFFF, _kand_mask64(mend, b)); // we adjust mend at the end of the output.
+
+
+      // Encodings out of range...
+      {
+        // the location of 3-byte sequence start bytes in the input
+        __mmask64 m3 = m34 & (b ^ m4);
+        // code units in Wout corresponding to 3-byte sequences.
+        __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend));
+        __m512i mask_08000800 = _mm512_set1_epi32(0x08000800);
+        __mmask32 Msmall800 = _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800);
+        __m512i mask_d800d800 = _mm512_set1_epi32(0xd800d800);
+        __m512i Moutminusd800 = _mm512_sub_epi16(Wout, mask_d800d800);
+        __mmask32 M3s = _mm512_mask_cmplt_epu16_mask(M3, Moutminusd800, mask_08000800);
+        if (_kor_mask32(Msmall800, M3s)) { return false; }
+      }
+      int64_t nout = _mm_popcnt_u64(mprocessed);
+      in +=  64 - _lzcnt_u64(mprocessed);
+      if(big_endian) { Wout = _mm512_shuffle_epi8(Wout, byteflip); }
+      _mm512_mask_storeu_epi16(out, __mmask32((uint64_t(1) << nout) - 1), Wout);
+      out += nout;
+      return true; // ok
+    }
+    //
+    // We have a 4-byte sequence, this is the general case.
     // Slow!
     __mmask64 mp3 = _kshiftli_mask64(m4, 3);
     __mmask64 mc = _kor_mask64(_kor_mask64(mp1, mp2), mp3); // expected continuation bytes
@@ -16025,7 +18112,7 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
     {
       // the location of 3-byte sequence start bytes in the input
       __mmask64 m3 = m34 & (b ^ m4);
-      // words in Wout corresponding to 3-byte sequences.
+      // code units in Wout corresponding to 3-byte sequences.
       __mmask32 M3 = __mmask32(_pext_u64(m3 << 2, mend));
       __m512i mask_08000800 = _mm512_set1_epi32(0x08000800);
       __mmask32 Msmall800 = _mm512_mask_cmplt_epu16_mask(M3, Wout, mask_08000800);
@@ -16075,9 +18162,9 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
     in += 64 - _lzcnt_u64(_pdep_u64(0xFFFFFFFF, continuation_or_ascii));
   }
   __m512i lead = _mm512_maskz_compress_epi8(leading, leading2byte);          // will contain zero for ascii, and the data
-  lead = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(lead));                 // ... zero extended into words
+  lead = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(lead));                 // ... zero extended into code units
   __m512i follow = _mm512_maskz_compress_epi8(continuation_or_ascii, input); // the last bytes of each sequence
-  follow = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(follow));             // ... zero extended into words
+  follow = _mm512_cvtepu8_epi16(_mm512_castsi512_si256(follow));             // ... zero extended into code units
   lead = _mm512_slli_epi16(lead, 6);                                         // shifted into position
   __m512i final = _mm512_add_epi16(follow, lead);                            // combining lead and follow
 
@@ -16100,13 +18187,13 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
 
 
 /*
-    utf32_to_utf16_masked converts `count` lower UTF-32 words
+    utf32_to_utf16_masked converts `count` lower UTF-32 code units
     from input `utf32` into UTF-16. It differs from utf32_to_utf16
     in that it 'masks' the writes.
 
-    Returns how many 16-bit words were stored.
+    Returns how many 16-bit code units were stored.
 
-    byteflip is used for flipping 16-bit words, and it should be
+    byteflip is used for flipping 16-bit code units, and it should be
         __m512i byteflip = _mm512_setr_epi64(
             0x0607040502030001,
             0x0e0f0c0d0a0b0809,
@@ -16139,7 +18226,7 @@ simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, __m51
     }
 
     {
-        // build surrogate pair words in 32-bit lanes
+        // build surrogate pair code units in 32-bit lanes
 
         //    t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb]
         const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000);
@@ -16160,7 +18247,7 @@ simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, __m51
         const __m512i t3 = _mm512_ternarylogic_epi32(t2, v_fc00_fc00, v_d800_dc00, 0xba);
         const __m512i t4 = _mm512_mask_blend_epi32(sp_mask, utf32, t3);
         __m512i t5 = _mm512_ror_epi32(t4, 16);
-        // Here we want to trim all of the upper 16-bit words from the 2-byte
+        // Here we want to trim all of the upper 16-bit code units from the 2-byte
         // characters represented as 4-byte values. We can compute it from
         // sp_mask or the following... It can be more optimized!
         const  __mmask32 nonzero = _kor_mask32(0xaaaaaaaa,_mm512_cmpneq_epi16_mask(t5, _mm512_setzero_si512()));
@@ -16176,12 +18263,12 @@ simdutf_really_inline size_t utf32_to_utf16_masked(const __m512i byteflip, __m51
 }
 
 /*
-    utf32_to_utf16 converts `count` lower UTF-32 words
+    utf32_to_utf16 converts `count` lower UTF-32 code units
     from input `utf32` into UTF-16. It may overflow.
 
-    Returns how many 16-bit words were stored.
+    Returns how many 16-bit code units were stored.
 
-    byteflip is used for flipping 16-bit words, and it should be
+    byteflip is used for flipping 16-bit code units, and it should be
         __m512i byteflip = _mm512_setr_epi64(
             0x0607040502030001,
             0x0e0f0c0d0a0b0809,
@@ -16212,7 +18299,7 @@ simdutf_really_inline size_t utf32_to_utf16(const __m512i byteflip, __m512i utf3
     }
 
     {
-        // build surrogate pair words in 32-bit lanes
+        // build surrogate pair code units in 32-bit lanes
 
         //    t0 = 8 x [000000000000aaaa|aaaaaabbbbbbbbbb]
         const __m512i v_0001_0000 = _mm512_set1_epi32(0x00010000);
@@ -16299,8 +18386,8 @@ __m512i rotate_by_N_epi8(const __m512i input) {
 simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, __m512i utf8) {
     /*
         Input:
-        - utf8: bytes stored at separate 32-bit words
-        - valid: which words have valid UTF-8 characters
+        - utf8: bytes stored at separate 32-bit code units
+        - valid: which code units have valid UTF-8 characters
 
         Bit layout of single word. We show 4 cases for each possible
         UTF-8 character encoding. The `?` denotes bits we must not
@@ -16448,7 +18535,6 @@ simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) {
     return expanded_utf8_to_utf32(char_class, input);
 }
 /* end file src/icelake/icelake_utf8_common.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_macros.inl.cpp
 /* begin file src/icelake/icelake_macros.inl.cpp */
 
 /*
@@ -16584,7 +18670,6 @@ simdutf_really_inline __m512i expand_utf8_to_utf32(__m512i input) {
                 }                                                                         \
         }
 /* end file src/icelake/icelake_macros.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_from_valid_utf8.inl.cpp
 /* begin file src/icelake/icelake_from_valid_utf8.inl.cpp */
 // file included directly
 
@@ -16723,7 +18808,6 @@ std::pair valid_utf8_to_fixed_length(const char* str, size
 
 using utf8_to_utf16_result = std::pair;
 /* end file src/icelake/icelake_from_valid_utf8.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf8_validation.inl.cpp
 /* begin file src/icelake/icelake_utf8_validation.inl.cpp */
 // file included directly
 
@@ -16853,14 +18937,13 @@ simdutf_really_inline __m512i check_special_cases(__m512i input, const __m512i p
 
   }; // struct avx512_utf8_checker
 /* end file src/icelake/icelake_utf8_validation.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_from_utf8.inl.cpp
 /* begin file src/icelake/icelake_from_utf8.inl.cpp */
 // file included directly
 
 // File contains conversion procedure from possibly invalid UTF-8 strings.
 
 /**
- * Attempts to convert up to len 1-byte words from in (in UTF-8 format) to
+ * Attempts to convert up to len 1-byte code units from in (in UTF-8 format) to
  * out.
  * Returns the position of the input and output after the processing is
  * completed. Upon error, the output is set to null.
@@ -17138,25 +19221,496 @@ std::tuple validating_utf8_to_fixed_length_with_cons
                 SIMDUTF_ICELAKE_WRITE_UTF16_OR_UTF32(vec1, valid_count1, true)
             }
 
-            const __m512i lane3 = broadcast_epi128<3>(utf8);
-            SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true)
+            const __m512i lane3 = broadcast_epi128<3>(utf8);
+            SIMDUTF_ICELAKE_TRANSCODE16(lane2, lane3, true)
+
+            ptr += 3*16;
+        }
+        validatedptr += 4*16;
+    }
+    {
+       const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - validatedptr))-1, (const __m512i*)validatedptr);
+       checker.check_next_input(utf8);
+    }
+    checker.check_eof();
+    if(checker.errors()) {
+        return {ptr, output, false}; // We found an error.
+    }
+    return {ptr, output, true};
+}
+/* end file src/icelake/icelake_from_utf8.inl.cpp */
+/* begin file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */
+// file included directly
+
+// File contains conversion procedure from possibly invalid UTF-8 strings.
+
+// template 
+template 
+simdutf_really_inline size_t process_block_from_utf8_to_latin1(const char *buf, size_t len,
+                                           char *latin_output, __m512i minus64,
+                                           __m512i one,
+                                           __mmask64 *next_leading_ptr,
+                                           __mmask64 *next_bit6_ptr) {
+  __mmask64 load_mask =
+      is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL;
+  __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf);
+  __mmask64 nonascii = _mm512_movepi8_mask(input);
+
+  if (nonascii == 0) {
+    is_remaining
+        ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input)
+        : _mm512_storeu_si512((__m512i *)latin_output, input);
+    return len;
+  }
+
+  __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64);
+
+  __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62));
+  __mmask64 invalid_leading_bytes =
+      _mm512_mask_cmpgt_epu8_mask(leading, highbits, one);
+
+  if (invalid_leading_bytes) {
+    return 0; // Indicates error
+  }
+
+  __mmask64 leading_shift = (leading << 1) | *next_leading_ptr;
+  *next_leading_ptr = leading >> 63;
+
+  if ((nonascii ^ leading) != leading_shift) {
+    return 0; // Indicates error
+  }
+
+  __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one);
+  input =
+      _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64);
+  *next_bit6_ptr = bit6 >> 63;
+
+  __mmask64 retain = ~leading & load_mask;
+  __m512i output = _mm512_maskz_compress_epi8(retain, input);
+  int64_t written_out = count_ones(retain);
+  __mmask64 store_mask = (1ULL << written_out) - 1;
+
+  // ***************************
+  //  Possible optimization? (Nick Nuon)
+  //  This commented out line is 5% faster but sadly it'll also write past
+  //  memory bounds for latin1_output: is_remaining ?
+  //  _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output) :
+  //  _mm512_storeu_si512((__m512i *)latin_output, output); I tried using
+  //  _mm512_storeu_si512 and have the next process_block start from the
+  //  "written_out" point but the compiler shuffles memory in such a way that it
+  //  is signifcantly slower...
+  // ****************************
+  _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output);
+
+  return written_out;
+}
+
+size_t utf8_to_latin1_avx512(const char *buf, size_t len, char *latin_output) {
+  char *start = latin_output;
+  size_t pos = 0;
+  __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000
+  __m512i one = _mm512_set1_epi8(1);
+  __mmask64 next_leading = 0;
+  __mmask64 next_bit6 = 0;
+
+  while (pos + 64 <= len) {
+    size_t written = process_block_from_utf8_to_latin1(buf + pos, 64, latin_output, minus64,
+                                          one, &next_leading, &next_bit6);
+    if (written == 0) {
+      return 0; // Indicates error
+    }
+    latin_output += written;
+    pos += 64;
+  }
+
+  if (pos < len) {
+    size_t remaining = len - pos;
+    size_t written =
+        process_block_from_utf8_to_latin1(buf + pos, remaining, latin_output, minus64, one,
+                            &next_leading, &next_bit6);
+    if (written == 0) {
+      return 0; // Indicates error
+    }
+    latin_output += written;
+  }
+
+  return (size_t)(latin_output - start);
+}
+/* end file src/icelake/icelake_convert_utf8_to_latin1.inl.cpp */
+/* begin file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */
+// file included directly
+
+// File contains conversion procedure from valid UTF-8 strings.
+
+template 
+simdutf_really_inline size_t process_valid_block_from_utf8_to_latin1(const char *buf, size_t len,
+                                                 char *latin_output,
+                                                 __m512i minus64, __m512i one,
+                                                 __mmask64 *next_leading_ptr,
+                                                 __mmask64 *next_bit6_ptr) {
+  __mmask64 load_mask =
+      is_remaining ? _bzhi_u64(~0ULL, (unsigned int)len) : ~0ULL;
+  __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)buf);
+  __mmask64 nonascii = _mm512_movepi8_mask(input);
+
+  if (nonascii == 0) {
+    is_remaining
+        ? _mm512_mask_storeu_epi8((__m512i *)latin_output, load_mask, input)
+        : _mm512_storeu_si512((__m512i *)latin_output, input);
+    return len;
+  }
+
+  __mmask64 leading = _mm512_cmpge_epu8_mask(input, minus64);
+
+  __m512i highbits = _mm512_xor_si512(input, _mm512_set1_epi8(-62));
+
+  *next_leading_ptr = leading >> 63;
+
+  __mmask64 bit6 = _mm512_cmpeq_epi8_mask(highbits, one);
+  input =
+      _mm512_mask_sub_epi8(input, (bit6 << 1) | *next_bit6_ptr, input, minus64);
+  *next_bit6_ptr = bit6 >> 63;
+
+  __mmask64 retain = ~leading & load_mask;
+  __m512i output = _mm512_maskz_compress_epi8(retain, input);
+  int64_t written_out = count_ones(retain);
+  __mmask64 store_mask = (1ULL << written_out) - 1;
+  // Optimization opportunity: sometimes, masked writes are not needed.
+  _mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output);
+  return written_out;
+}
+
+size_t valid_utf8_to_latin1_avx512(const char *buf, size_t len,
+                                   char *latin_output) {
+  char *start = latin_output;
+  size_t pos = 0;
+  __m512i minus64 = _mm512_set1_epi8(-64); // 11111111111 ... 1100 0000
+  __m512i one = _mm512_set1_epi8(1);
+  __mmask64 next_leading = 0;
+  __mmask64 next_bit6 = 0;
+
+  while (pos + 64 <= len) {
+    size_t written = process_valid_block_from_utf8_to_latin1(
+        buf + pos, 64, latin_output, minus64, one, &next_leading, &next_bit6);
+    latin_output += written;
+    pos += 64;
+  }
+
+  if (pos < len) {
+    size_t remaining = len - pos;
+    size_t written =
+        process_valid_block_from_utf8_to_latin1(buf + pos, remaining, latin_output, minus64,
+                                  one, &next_leading, &next_bit6);
+    latin_output += written;
+  }
+
+  return (size_t)(latin_output - start);
+}
+/* end file src/icelake/icelake_convert_valid_utf8_to_latin1.inl.cpp */
+/* begin file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */
+// file included directly
+template 
+size_t icelake_convert_utf16_to_latin1(const char16_t *buf, size_t len,
+                                       char *latin1_output) {
+  const char16_t *end = buf + len;
+  __m512i v_0xFF = _mm512_set1_epi16(0xff);
+  __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809);
+  __m512i shufmask = _mm512_set_epi8(
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38,
+      36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
+  while (buf + 32 <= end) {
+    __m512i in = _mm512_loadu_si512((__m512i *)buf);
+    if (big_endian) {
+      in = _mm512_shuffle_epi8(in, byteflip);
+    }
+    if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
+      return 0;
+    }
+    _mm256_storeu_si256(
+        (__m256i *)latin1_output,
+        _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
+    latin1_output += 32;
+    buf += 32;
+  }
+  if (buf < end) {
+    uint32_t mask(uint32_t(1 << (end - buf)) - 1);
+    __m512i in = _mm512_maskz_loadu_epi16(mask, buf);
+    if (big_endian) {
+      in = _mm512_shuffle_epi8(in, byteflip);
+    }
+    if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
+      return 0;
+    }
+    _mm256_mask_storeu_epi8(
+        latin1_output, mask,
+        _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
+  }
+  return len;
+}
+
+template 
+std::pair
+icelake_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
+                                            char *latin1_output) {
+  const char16_t *end = buf + len;
+  const char16_t *start = buf;
+  __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809);
+  __m512i v_0xFF = _mm512_set1_epi16(0xff);
+  __m512i shufmask = _mm512_set_epi8(
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38,
+      36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
+  while (buf + 32 <= end) {
+    __m512i in = _mm512_loadu_si512((__m512i *)buf);
+    if (big_endian) {
+      in = _mm512_shuffle_epi8(in, byteflip);
+    }
+    if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
+      uint16_t word;
+      while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf))
+                                 : uint16_t(*buf))) <= 0xff) {
+        *latin1_output++ = uint8_t(word);
+        buf++;
+      }
+      return std::make_pair(result(error_code::TOO_LARGE, buf - start),
+                            latin1_output);
+    }
+    _mm256_storeu_si256(
+        (__m256i *)latin1_output,
+        _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
+    latin1_output += 32;
+    buf += 32;
+  }
+  if (buf < end) {
+    uint32_t mask(uint32_t(1 << (end - buf)) - 1);
+    __m512i in = _mm512_maskz_loadu_epi16(mask, buf);
+    if (big_endian) {
+      in = _mm512_shuffle_epi8(in, byteflip);
+    }
+    if (_mm512_cmpgt_epu16_mask(in, v_0xFF)) {
+
+      uint16_t word;
+      while ((word = (big_endian ? scalar::utf16::swap_bytes(uint16_t(*buf))
+                                 : uint16_t(*buf))) <= 0xff) {
+        *latin1_output++ = uint8_t(word);
+        buf++;
+      }
+      return std::make_pair(result(error_code::TOO_LARGE, buf - start),
+                            latin1_output);
+    }
+    _mm256_mask_storeu_epi8(
+        latin1_output, mask,
+        _mm512_castsi512_si256(_mm512_permutexvar_epi8(shufmask, in)));
+  }
+  return std::make_pair(result(error_code::SUCCESS, len), latin1_output);
+}
+/* end file src/icelake/icelake_convert_utf16_to_latin1.inl.cpp */
+/* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */
+// file included directly
+
+/**
+ * This function converts the input (inbuf, inlen), assumed to be valid
+ * UTF16 (little endian) into UTF-8 (to outbuf). The number of code units written
+ * is written to 'outlen' and the function reports the number of input word
+ * consumed.
+ */
+template 
+size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen,
+                               unsigned char *outbuf, size_t *outlen) {
+  __m512i in;
+  __mmask32 inmask = _cvtu32_mask32(0x7fffffff);
+  __m512i byteflip = _mm512_setr_epi64(
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809,
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809,
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809,
+            0x0607040502030001,
+            0x0e0f0c0d0a0b0809
+        );
+  const char16_t * const inbuf_orig = inbuf;
+  const unsigned char * const outbuf_orig = outbuf;
+  size_t adjust = 0;
+  int carry = 0;
+
+  while (inlen >= 32) {
+    in = _mm512_loadu_si512(inbuf);
+    if(big_endian) { in = _mm512_shuffle_epi8(in, byteflip); }
+    inlen -= 31;
+  lastiteration:
+    inbuf += 31;
+
+  failiteration:
+    const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask(
+      inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT);
+
+    if (_ktestz_mask32_u8(inmask, is234byte)) {
+      // fast path for ASCII only
+      _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in);
+      outbuf += 31;
+      carry = 0;
+
+      if (inlen < 32) {
+        goto tail;
+      } else {
+        continue;
+      }
+    }
+
+    const __mmask32 is12byte =
+        _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT);
+
+    if (_ktestc_mask32_u8(is12byte, inmask)) {
+      // fast path for 1 and 2 byte only
+
+      const __m512i twobytes = _mm512_ternarylogic_epi32(
+          _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6),
+          _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C
+      in = _mm512_mask_add_epi16(in, is234byte, twobytes,
+                                 _mm512_set1_epi16(int16_t(0x80c0)));
+      const __m512i cmpmask =
+          _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)),
+                                  _mm512_set1_epi16(0x0800));
+      const __mmask64 smoosh = _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT);
+      const __m512i out = _mm512_maskz_compress_epi8(smoosh, in);
+      _mm512_mask_storeu_epi8(outbuf, _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh), _cvtmask64_u64(smoosh))),
+                              out);
+      outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte));
+      carry = 0;
+
+      if (inlen < 32) {
+        goto tail;
+      } else {
+        continue;
+      }
+    }
+    __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in));
+    __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1));
+
+
+    __m512i taglo = _mm512_set1_epi32(0x8080e000);
+    __m512i taghi = taglo;
+
+    const __m512i fc00masked = _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00)));
+    const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask(
+        inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ);
+    const __mmask32 losurr = _mm512_cmp_epu16_mask(
+        fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ);
+
+    int carryout = 0;
+    if (!_kortestz_mask32_u8(hisurr, losurr)) {
+      // handle surrogates
+
+      __m512i los = _mm512_alignr_epi32(hi, lo, 1);
+      __m512i his = _mm512_alignr_epi32(lo, hi, 1);
+
+      const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16);
+      taglo =
+          _mm512_mask_mov_epi32(taglo,__mmask16(hisurr), _mm512_set1_epi32(0x808080f0));
+      taghi =
+          _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi), _mm512_set1_epi32(0x808080f0));
+
+      lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10);
+      hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10);
+      los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400));
+      his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400));
+      lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los);
+      hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his);
+
+      carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30));
+
+      const uint32_t  h = _cvtmask32_u32(hisurr);
+      const uint32_t  l = _cvtmask32_u32(losurr);
+      // check for mismatched surrogates
+      if ((h + h + carry) ^ l) {
+        const uint32_t lonohi = l & ~(h + h + carry);
+        const uint32_t hinolo = h & ~(l >> 1);
+        inlen = _tzcnt_u32(hinolo | lonohi);
+        inmask = __mmask32(0x7fffffff & ((1 << inlen) - 1));
+        in = _mm512_maskz_mov_epi16(inmask, in);
+        adjust = (int)inlen - 31;
+        inlen = 0;
+        goto failiteration;
+      }
+    }
+
+    hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff),hi);
+    carry = carryout;
+
+    __m512i mslo =
+        _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo);
+
+    __m512i mshi =
+        _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi);
+
+    const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask));
+    const __mmask64 outmhi = _kshiftri_mask64(outmask, 16);
+
+    const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte));
+    const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16);
+    const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16);
+
+    taglo =
+        _mm512_mask_mov_epi32(taglo, __mmask16(is12byte), _mm512_set1_epi32(0x80c00000));
+    taghi =
+        _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi), _mm512_set1_epi32(0x80c00000));
+    __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), _mm512_set1_epi32(0xffffffff),
+                                      _mm512_set1_epi32(0x00010101));
+    __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), _mm512_set1_epi32(0xffffffff),
+                                      _mm512_set1_epi32(0x00010101));
+
 
-            ptr += 3*16;
-        }
-        validatedptr += 4*16;
-    }
-    {
-       const __m512i utf8 = _mm512_maskz_loadu_epi8((1ULL<<(end - validatedptr))-1, (const __m512i*)validatedptr);
-       checker.check_next_input(utf8);
-    }
-    checker.check_eof();
-    if(checker.errors()) {
-        return {ptr, output, false}; // We found an error.
-    }
-    return {ptr, output, true};
+    magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), _mm512_set1_epi32(0xffffffff),
+                                      _mm512_set1_epi32(0x00010101));
+    magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), _mm512_set1_epi32(0xffffffff),
+                                      _mm512_set1_epi32(0x00010101));
+
+    mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo,
+                                     0xea); // A&B|C
+    mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi,
+                                     0xea);
+    mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24);
+
+    mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24);
+
+    const __mmask64 wantlo = _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT);
+    const __mmask64 wanthi = _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT);
+    const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo);
+    const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi);
+    const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo);
+    const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi);
+
+    uint64_t advlo = _mm_popcnt_u64(wantlo_uint64);
+    uint64_t advhi = _mm_popcnt_u64(wanthi_uint64);
+
+    _mm512_mask_storeu_epi8(outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo);
+    _mm512_mask_storeu_epi8(outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)), outhi);
+    outbuf += advlo + advhi;
+  }
+  outbuf -= adjust;
+
+tail:
+  if (inlen != 0) {
+    // We must have inlen < 31.
+    inmask = _cvtu32_mask32((1 << inlen) - 1);
+    in = _mm512_maskz_loadu_epi16(inmask, inbuf);
+    if(big_endian) { in = _mm512_shuffle_epi8(in, byteflip); }
+    adjust = inlen - 31;
+    inlen = 0;
+    goto lastiteration;
+  }
+  *outlen = (outbuf - outbuf_orig) + adjust;
+  return ((inbuf - inbuf_orig) + adjust);
 }
-/* end file src/icelake/icelake_from_utf8.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf16_to_utf32.inl.cpp
+/* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */
 /* begin file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */
 // file included directly
 
@@ -17181,7 +19735,7 @@ std::tuple convert_utf16_to_utf32(const char16
             0x0607040502030001,
             0x0e0f0c0d0a0b0809
         );
-  while (buf + 32 <= end) {
+  while (std::distance(buf,end) >= 32) {
     // Always safe because buf + 32 <= end so that end - buf >= 32 bytes:
     __m512i in = _mm512_loadu_si512((__m512i*)buf);
     if(big_endian) { in = _mm512_shuffle_epi8(in, byteflip); }
@@ -17203,7 +19757,7 @@ std::tuple convert_utf16_to_utf32(const char16
             |1101.11aa.aaaa.aaaa|1101.10bb.bbbb.bbbb|
                 low surrogate      high surrogate
         */
-        /*  1. Expand all words to 32-bit words
+        /*  1. Expand all code units to 32-bit code units
             in  |0000.0000.0000.0000.1101.11aa.aaaa.aaaa|0000.0000.0000.0000.1101.10bb.bbbb.bbbb|
         */
         const __m512i first = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in));
@@ -17234,7 +19788,7 @@ std::tuple convert_utf16_to_utf32(const char16
         const __m512i added_second = _mm512_mask_add_epi32(aligned_second, (__mmask16)(H>>16), aligned_second, shifted_second);
         const __m512i utf32_second = _mm512_mask_add_epi32(added_second, (__mmask16)(H>>16), added_second, constant);
 
-        //  5. Store all valid UTF-32 words (low surrogate positions and 32nd word are invalid)
+        //  5. Store all valid UTF-32 code units (low surrogate positions and 32nd word are invalid)
         const __mmask32 valid = ~L & 0x7fffffff;
         // We deliberately do a _mm512_maskz_compress_epi32 followed by storeu_epi32
         // to ease performance portability to Zen 4.
@@ -17248,7 +19802,7 @@ std::tuple convert_utf16_to_utf32(const char16
         //_mm512_storeu_epi32((__m512i *) utf32_output, compressed_second);
         _mm512_mask_storeu_epi32((__m512i *) utf32_output, __mmask16((1<> 30) & 0x1;
       } else {
@@ -17257,7 +19811,7 @@ std::tuple convert_utf16_to_utf32(const char16
       }
     } else {
       // no surrogates
-      // extend all thirty-two 16-bit words to thirty-two 32-bit words
+      // extend all thirty-two 16-bit code units to thirty-two 32-bit code units
       _mm512_storeu_si512((__m512i *)(utf32_output), _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in)));
       _mm512_storeu_si512((__m512i *)(utf32_output) + 1, _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in,1)));
       utf32_output += 32;
@@ -17268,7 +19822,80 @@ std::tuple convert_utf16_to_utf32(const char16
   return std::make_tuple(buf+carry, utf32_output, true);
 }
 /* end file src/icelake/icelake_convert_utf16_to_utf32.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf32_to_utf8.inl.cpp
+/* begin file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */
+// file included directly
+size_t icelake_convert_utf32_to_latin1(const char32_t *buf, size_t len,
+                                       char *latin1_output) {
+  const char32_t *end = buf + len;
+  __m512i v_0xFF = _mm512_set1_epi32(0xff);
+  __m512i shufmask = _mm512_set_epi8(
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60,
+      56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0);
+  while (buf + 16 <= end) {
+    __m512i in = _mm512_loadu_si512((__m512i *)buf);
+    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
+      return 0;
+    }
+    _mm_storeu_si128((__m128i *)latin1_output,
+                     _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
+    latin1_output += 16;
+    buf += 16;
+  }
+  if (buf < end) {
+    uint16_t mask = uint16_t((1 << (end - buf)) - 1);
+    __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
+    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
+      return 0;
+    }
+    _mm_mask_storeu_epi8(
+        latin1_output, mask,
+        _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
+  }
+  return len;
+}
+
+std::pair
+icelake_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
+                                            char *latin1_output) {
+  const char32_t *end = buf + len;
+  const char32_t *start = buf;
+  __m512i v_0xFF = _mm512_set1_epi32(0xff);
+  __m512i shufmask = _mm512_set_epi8(
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60,
+      56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0);
+  while (buf + 16 <= end) {
+    __m512i in = _mm512_loadu_si512((__m512i *)buf);
+    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
+      while (uint32_t(*buf) <= 0xff) {
+        *latin1_output++ = uint8_t(*buf++);
+      }
+      return std::make_pair(result(error_code::TOO_LARGE, buf - start),
+                            latin1_output);
+    }
+    _mm_storeu_si128((__m128i *)latin1_output,
+                     _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
+    latin1_output += 16;
+    buf += 16;
+  }
+  if (buf < end) {
+    uint16_t mask = uint16_t((1 << (end - buf)) - 1);
+    __m512i in = _mm512_maskz_loadu_epi32(mask, buf);
+    if (_mm512_cmpgt_epu32_mask(in, v_0xFF)) {
+      while (uint32_t(*buf) <= 0xff) {
+        *latin1_output++ = uint8_t(*buf++);
+      }
+      return std::make_pair(result(error_code::TOO_LARGE, buf - start),
+                            latin1_output);
+    }
+    _mm_mask_storeu_epi8(
+        latin1_output, mask,
+        _mm512_castsi512_si128(_mm512_permutexvar_epi8(shufmask, in)));
+  }
+  return std::make_pair(result(error_code::SUCCESS, len), latin1_output);
+}
+/* end file src/icelake/icelake_convert_utf32_to_latin1.inl.cpp */
 /* begin file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */
 // file included directly
 
@@ -17291,7 +19918,7 @@ std::pair avx512_convert_utf32_to_utf8(const char32_t* b
     __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
     running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin);
 
-    // Pack 32-bit UTF-32 words to 16-bit UTF-16 words with unsigned saturation
+    // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned saturation
     __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), _mm256_and_si256(nextin, v_7fffffff));
     in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000);
 
@@ -17362,7 +19989,7 @@ std::pair avx512_convert_utf32_to_utf8(const char32_t* b
     const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
     const uint32_t saturation_bitmask = static_cast(_mm256_movemask_epi8(saturation_bytemask));
     if (saturation_bitmask == 0xffffffff) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
       const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
       forbidden_bytemask = _mm256_or_si256(forbidden_bytemask, _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800));
 
@@ -17376,7 +20003,7 @@ std::pair avx512_convert_utf32_to_utf8(const char32_t* b
         2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
         3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-        We expand the input word (16-bit) into two words (32-bit), thus
+        We expand the input word (16-bit) into two code units (32-bit), thus
         we have room for four bytes. However, we need five distinct bit
         layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -17387,7 +20014,7 @@ std::pair avx512_convert_utf32_to_utf8(const char32_t* b
         either byte 1 for case #2 or byte 2 for case #3. Note that they
         differ by exactly one bit.
 
-        Finally from these two words we build proper UTF-8 sequence, taking
+        Finally from these two code units we build proper UTF-8 sequence, taking
         into account the case (i.e, the number of bytes to write).
       */
       /**
@@ -17415,16 +20042,16 @@ std::pair avx512_convert_utf32_to_utf8(const char32_t* b
       const __m256i s4 = _mm256_xor_si256(s3, m0);
 #undef simdutf_vec
 
-      // 4. expand words 16-bit => 32-bit
+      // 4. expand code units 16-bit => 32-bit
       const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
       const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
 
-      // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+      // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
       const uint32_t mask = (one_byte_bitmask & 0x55555555) |
                             (one_or_two_bytes_bitmask & 0xaaaaaaaa);
       // Due to the wider registers, the following path is less likely to be useful.
       /*if(mask == 0) {
-        // We only have three-byte words. Use fast path.
+        // We only have three-byte code units. Use fast path.
         const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
         const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
         const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
@@ -17536,7 +20163,7 @@ std::pair avx512_convert_utf32_to_utf8_with_errors(const char32_t
       return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
     }
 
-    // Pack 32-bit UTF-32 words to 16-bit UTF-16 words with unsigned saturation
+    // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned saturation
     __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), _mm256_and_si256(nextin, v_7fffffff));
     in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000);
 
@@ -17607,9 +20234,9 @@ std::pair avx512_convert_utf32_to_utf8_with_errors(const char32_t
     const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
     const uint32_t saturation_bitmask = static_cast(_mm256_movemask_epi8(saturation_bytemask));
     if (saturation_bitmask == 0xffffffff) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
 
-      // Check for illegal surrogate words
+      // Check for illegal surrogate code units
       const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
       const __m256i forbidden_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800);
       if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0x0) {
@@ -17626,7 +20253,7 @@ std::pair avx512_convert_utf32_to_utf8_with_errors(const char32_t
         2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
         3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-        We expand the input word (16-bit) into two words (32-bit), thus
+        We expand the input word (16-bit) into two code units (32-bit), thus
         we have room for four bytes. However, we need five distinct bit
         layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -17637,7 +20264,7 @@ std::pair avx512_convert_utf32_to_utf8_with_errors(const char32_t
         either byte 1 for case #2 or byte 2 for case #3. Note that they
         differ by exactly one bit.
 
-        Finally from these two words we build proper UTF-8 sequence, taking
+        Finally from these two code units we build proper UTF-8 sequence, taking
         into account the case (i.e, the number of bytes to write).
       */
       /**
@@ -17665,16 +20292,16 @@ std::pair avx512_convert_utf32_to_utf8_with_errors(const char32_t
       const __m256i s4 = _mm256_xor_si256(s3, m0);
 #undef simdutf_vec
 
-      // 4. expand words 16-bit => 32-bit
+      // 4. expand code units 16-bit => 32-bit
       const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
       const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
 
-      // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+      // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
       const uint32_t mask = (one_byte_bitmask & 0x55555555) |
                             (one_or_two_bytes_bitmask & 0xaaaaaaaa);
       // Due to the wider registers, the following path is less likely to be useful.
       /*if(mask == 0) {
-        // We only have three-byte words. Use fast path.
+        // We only have three-byte code units. Use fast path.
         const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
         const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
         const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
@@ -17754,7 +20381,6 @@ std::pair avx512_convert_utf32_to_utf8_with_errors(const char32_t
   return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
 }
 /* end file src/icelake/icelake_convert_utf32_to_utf8.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf32_to_utf16.inl.cpp
 /* begin file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */
 // file included directly
 
@@ -17889,7 +20515,6 @@ std::pair avx512_convert_utf32_to_utf16_with_errors(const cha
   return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
 }
 /* end file src/icelake/icelake_convert_utf32_to_utf16.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_ascii_validation.inl.cpp
 /* begin file src/icelake/icelake_ascii_validation.inl.cpp */
 // file included directly
 
@@ -17908,7 +20533,6 @@ bool validate_ascii(const char* buf, size_t len) {
   return (_mm512_test_epi8_mask(running_or, running_or) == 0);
 }
 /* end file src/icelake/icelake_ascii_validation.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_utf32_validation.inl.cpp
 /* begin file src/icelake/icelake_utf32_validation.inl.cpp */
 // file included directly
 
@@ -17935,211 +20559,178 @@ const char32_t* validate_utf32(const char32_t* buf, size_t len) {
     is_zero = _mm512_xor_si512(_mm512_max_epu32(currentoffsetmax, standardoffsetmax), standardoffsetmax);
     if (_mm512_test_epi8_mask(is_zero, is_zero) != 0) {
       return nullptr;
-    }
-
-    return buf;
-}
-/* end file src/icelake/icelake_utf32_validation.inl.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=icelake/icelake_convert_utf16_to_utf8.inl.cpp
-/* begin file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */
-// file included directly
-
-/**
- * This function converts the input (inbuf, inlen), assumed to be valid
- * UTF16 (little endian) into UTF-8 (to outbuf). The number of words written
- * is written to 'outlen' and the function reports the number of input word
- * consumed.
- */
-template 
-size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen,
-                               unsigned char *outbuf, size_t *outlen) {
-  __m512i in;
-  __mmask32 inmask = _cvtu32_mask32(0x7fffffff);
-  __m512i byteflip = _mm512_setr_epi64(
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809,
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809,
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809,
-            0x0607040502030001,
-            0x0e0f0c0d0a0b0809
-        );
-  const char16_t * const inbuf_orig = inbuf;
-  const unsigned char * const outbuf_orig = outbuf;
-  size_t adjust = 0;
-  int carry = 0;
-
-  while (inlen >= 32) {
-    in = _mm512_loadu_si512(inbuf);
-    if(big_endian) { in = _mm512_shuffle_epi8(in, byteflip); }
-    inlen -= 31;
-  lastiteration:
-    inbuf += 31;
-
-  failiteration:
-    const __mmask32 is234byte = _mm512_mask_cmp_epu16_mask(
-      inmask, in, _mm512_set1_epi16(0x0080), _MM_CMPINT_NLT);
-
-    if (_ktestz_mask32_u8(inmask, is234byte)) {
-      // fast path for ASCII only
-      _mm512_mask_cvtepi16_storeu_epi8(outbuf, inmask, in);
-      outbuf += 31;
-      carry = 0;
-
-      if (inlen < 32) {
-        goto tail;
-      } else {
-        continue;
-      }
-    }
-
-    const __mmask32 is12byte =
-        _mm512_cmp_epu16_mask(in, _mm512_set1_epi16(0x0800), _MM_CMPINT_LT);
-
-    if (_ktestc_mask32_u8(is12byte, inmask)) {
-      // fast path for 1 and 2 byte only
-
-      const __m512i twobytes = _mm512_ternarylogic_epi32(
-          _mm512_slli_epi16(in, 8), _mm512_srli_epi16(in, 6),
-          _mm512_set1_epi16(0x3f3f), 0xa8); // (A|B)&C
-      in = _mm512_mask_add_epi16(in, is234byte, twobytes,
-                                 _mm512_set1_epi16(int16_t(0x80c0)));
-      const __m512i cmpmask =
-          _mm512_mask_blend_epi16(inmask, _mm512_set1_epi16(int16_t(0xffff)),
-                                  _mm512_set1_epi16(0x0800));
-      const __mmask64 smoosh = _mm512_cmp_epu8_mask(in, cmpmask, _MM_CMPINT_NLT);
-      const __m512i out = _mm512_maskz_compress_epi8(smoosh, in);
-      _mm512_mask_storeu_epi8(outbuf, _cvtu64_mask64(_pext_u64(_cvtmask64_u64(smoosh), _cvtmask64_u64(smoosh))),
-                              out);
-      outbuf += 31 + _mm_popcnt_u32(_cvtmask32_u32(is234byte));
-      carry = 0;
-
-      if (inlen < 32) {
-        goto tail;
-      } else {
-        continue;
-      }
-    }
-    __m512i lo = _mm512_cvtepu16_epi32(_mm512_castsi512_si256(in));
-    __m512i hi = _mm512_cvtepu16_epi32(_mm512_extracti32x8_epi32(in, 1));
-
-
-    __m512i taglo = _mm512_set1_epi32(0x8080e000);
-    __m512i taghi = taglo;
-
-    const __m512i fc00masked = _mm512_and_epi32(in, _mm512_set1_epi16(int16_t(0xfc00)));
-    const __mmask32 hisurr = _mm512_mask_cmp_epu16_mask(
-        inmask, fc00masked, _mm512_set1_epi16(int16_t(0xd800)), _MM_CMPINT_EQ);
-    const __mmask32 losurr = _mm512_cmp_epu16_mask(
-        fc00masked, _mm512_set1_epi16(int16_t(0xdc00)), _MM_CMPINT_EQ);
-
-    int carryout = 0;
-    if (!_kortestz_mask32_u8(hisurr, losurr)) {
-      // handle surrogates
-
-      __m512i los = _mm512_alignr_epi32(hi, lo, 1);
-      __m512i his = _mm512_alignr_epi32(lo, hi, 1);
-
-      const __mmask32 hisurrhi = _kshiftri_mask32(hisurr, 16);
-      taglo =
-          _mm512_mask_mov_epi32(taglo,__mmask16(hisurr), _mm512_set1_epi32(0x808080f0));
-      taghi =
-          _mm512_mask_mov_epi32(taghi, __mmask16(hisurrhi), _mm512_set1_epi32(0x808080f0));
-
-      lo = _mm512_mask_slli_epi32(lo, __mmask16(hisurr), lo, 10);
-      hi = _mm512_mask_slli_epi32(hi, __mmask16(hisurrhi), hi, 10);
-      los = _mm512_add_epi32(los, _mm512_set1_epi32(0xfca02400));
-      his = _mm512_add_epi32(his, _mm512_set1_epi32(0xfca02400));
-      lo = _mm512_mask_add_epi32(lo, __mmask16(hisurr), lo, los);
-      hi = _mm512_mask_add_epi32(hi, __mmask16(hisurrhi), hi, his);
-
-      carryout = _cvtu32_mask32(_kshiftri_mask32(hisurr, 30));
-
-      const uint32_t  h = _cvtmask32_u32(hisurr);
-      const uint32_t  l = _cvtmask32_u32(losurr);
-      // check for mismatched surrogates
-      if ((h + h + carry) ^ l) {
-        const uint32_t lonohi = l & ~(h + h + carry);
-        const uint32_t hinolo = h & ~(l >> 1);
-        inlen = _tzcnt_u32(hinolo | lonohi);
-        inmask = __mmask32(0x7fffffff & ((1 << inlen) - 1));
-        in = _mm512_maskz_mov_epi16(inmask, in);
-        adjust = (int)inlen - 31;
-        inlen = 0;
-        goto failiteration;
-      }
-    }
-
-    hi = _mm512_maskz_mov_epi32(_cvtu32_mask16(0x7fff),hi);
-    carry = carryout;
-
-    __m512i mslo =
-        _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), lo);
-
-    __m512i mshi =
-        _mm512_multishift_epi64_epi8(_mm512_set1_epi64(0x20262c3200060c12), hi);
-
-    const __mmask32 outmask = __mmask32(_kandn_mask64(losurr, inmask));
-    const __mmask64 outmhi = _kshiftri_mask64(outmask, 16);
-
-    const __mmask32 is1byte = __mmask32(_knot_mask64(is234byte));
-    const __mmask64 is1bhi = _kshiftri_mask64(is1byte, 16);
-    const __mmask64 is12bhi = _kshiftri_mask64(is12byte, 16);
-
-    taglo =
-        _mm512_mask_mov_epi32(taglo, __mmask16(is12byte), _mm512_set1_epi32(0x80c00000));
-    taghi =
-        _mm512_mask_mov_epi32(taghi, __mmask16(is12bhi), _mm512_set1_epi32(0x80c00000));
-    __m512i magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), _mm512_set1_epi32(0xffffffff),
-                                      _mm512_set1_epi32(0x00010101));
-    __m512i magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), _mm512_set1_epi32(0xffffffff),
-                                      _mm512_set1_epi32(0x00010101));
-
-
-    magiclo = _mm512_mask_blend_epi32(__mmask16(outmask), _mm512_set1_epi32(0xffffffff),
-                                      _mm512_set1_epi32(0x00010101));
-    magichi = _mm512_mask_blend_epi32(__mmask16(outmhi), _mm512_set1_epi32(0xffffffff),
-                                      _mm512_set1_epi32(0x00010101));
-
-    mslo = _mm512_ternarylogic_epi32(mslo, _mm512_set1_epi32(0x3f3f3f3f), taglo,
-                                     0xea); // A&B|C
-    mshi = _mm512_ternarylogic_epi32(mshi, _mm512_set1_epi32(0x3f3f3f3f), taghi,
-                                     0xea);
-    mslo = _mm512_mask_slli_epi32(mslo, __mmask16(is1byte), lo, 24);
-
-    mshi = _mm512_mask_slli_epi32(mshi, __mmask16(is1bhi), hi, 24);
-
-    const __mmask64 wantlo = _mm512_cmp_epu8_mask(mslo, magiclo, _MM_CMPINT_NLT);
-    const __mmask64 wanthi = _mm512_cmp_epu8_mask(mshi, magichi, _MM_CMPINT_NLT);
-    const __m512i outlo = _mm512_maskz_compress_epi8(wantlo, mslo);
-    const __m512i outhi = _mm512_maskz_compress_epi8(wanthi, mshi);
-    const uint64_t wantlo_uint64 = _cvtmask64_u64(wantlo);
-    const uint64_t wanthi_uint64 = _cvtmask64_u64(wanthi);
+    }
 
-    uint64_t advlo = _mm_popcnt_u64(wantlo_uint64);
-    uint64_t advhi = _mm_popcnt_u64(wanthi_uint64);
+    return buf;
+}
+/* end file src/icelake/icelake_utf32_validation.inl.cpp */
+/* begin file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */
+// file included directly
 
-    _mm512_mask_storeu_epi8(outbuf, _cvtu64_mask64(_pext_u64(wantlo_uint64, wantlo_uint64)), outlo);
-    _mm512_mask_storeu_epi8(outbuf + advlo, _cvtu64_mask64(_pext_u64(wanthi_uint64, wanthi_uint64)), outhi);
-    outbuf += advlo + advhi;
+static inline size_t latin1_to_utf8_avx512_vec(__m512i input, size_t input_len, char *utf8_output, int mask_output) {
+  __mmask64 nonascii = _mm512_movepi8_mask(input);
+  size_t output_size = input_len + (size_t)count_ones(nonascii);
+  
+  // Mask to denote whether the byte is a leading byte that is not ascii
+  __mmask64 sixth =
+      _mm512_cmpge_epu8_mask(input, _mm512_set1_epi8(-64)); //binary representation of -64: 1100 0000
+  
+  const uint64_t alternate_bits = UINT64_C(0x5555555555555555);
+  uint64_t ascii = ~nonascii;
+  // the bits in ascii are inverted and zeros are interspersed in between them
+  uint64_t maskA = ~_pdep_u64(ascii, alternate_bits);
+  uint64_t maskB = ~_pdep_u64(ascii>>32, alternate_bits);
+  
+  // interleave bytes from top and bottom halves (abcd...ABCD -> aAbBcCdD)
+  __m512i input_interleaved = _mm512_permutexvar_epi8(_mm512_set_epi32(
+    0x3f1f3e1e, 0x3d1d3c1c, 0x3b1b3a1a, 0x39193818,
+    0x37173616, 0x35153414, 0x33133212, 0x31113010,
+    0x2f0f2e0e, 0x2d0d2c0c, 0x2b0b2a0a, 0x29092808,
+    0x27072606, 0x25052404, 0x23032202, 0x21012000
+  ), input);
+  
+  // double size of each byte, and insert the leading byte 1100 0010
+
+/* 
+upscale the bytes to 16-bit value, adding the 0b11000000 leading byte in the process.
+We adjust for the bytes that have their two most significant bits. This takes care of the first 32 bytes, assuming we interleaved the bytes. */
+  __m512i outputA = _mm512_shldi_epi16(input_interleaved, _mm512_set1_epi8(-62), 8); 
+  outputA = _mm512_mask_add_epi16(
+                                  outputA, 
+                                 (__mmask32)sixth, 
+                                  outputA, 
+                                  _mm512_set1_epi16(1 - 0x4000)); // 1- 0x4000 = 1100 0000 0000 0001????
+  
+  // in the second 32-bit half, set first or second option based on whether original input is leading byte (second case) or not (first case)
+  __m512i leadingB = _mm512_mask_blend_epi16(
+                                              (__mmask32)(sixth>>32), 
+                                              _mm512_set1_epi16(0x00c2), // 0000 0000 1101 0010
+                                              _mm512_set1_epi16(0x40c3));// 0100 0000 1100 0011
+  __m512i outputB = _mm512_ternarylogic_epi32(
+                                              input_interleaved, 
+                                              leadingB, 
+                                              _mm512_set1_epi16((short)0xff00), 
+                                              (240 & 170) ^ 204); // (input_interleaved & 0xff00) ^ leadingB
+  
+  // prune redundant bytes
+  outputA = _mm512_maskz_compress_epi8(maskA, outputA);
+  outputB = _mm512_maskz_compress_epi8(maskB, outputB);
+  
+  
+  size_t output_sizeA = (size_t)count_ones((uint32_t)nonascii) + 32;
+
+  if(mask_output) {
+    if(input_len > 32) { // is the second half of the input vector used?
+      __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_sizeA);
+      _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA);
+      utf8_output += output_sizeA;
+      write_mask = _bzhi_u64(~0ULL, (unsigned int)(output_size - output_sizeA));
+      _mm512_mask_storeu_epi8(utf8_output, write_mask, outputB);
+    } else {
+      __mmask64 write_mask = _bzhi_u64(~0ULL, (unsigned int)output_size);
+      _mm512_mask_storeu_epi8(utf8_output, write_mask, outputA);
+    }
+  } else {
+    _mm512_storeu_si512(utf8_output, outputA);
+    utf8_output += output_sizeA;
+    _mm512_storeu_si512(utf8_output, outputB);
   }
-  outbuf -= adjust;
+  return output_size;
+}
+ 
+static inline size_t latin1_to_utf8_avx512_branch(__m512i input, char *utf8_output) {
+  __mmask64 nonascii = _mm512_movepi8_mask(input);
+  if(nonascii) {
+    return latin1_to_utf8_avx512_vec(input, 64, utf8_output, 0);
+  } else {
+    _mm512_storeu_si512(utf8_output, input);
+    return 64;
+  }
+}
+ 
+size_t latin1_to_utf8_avx512_start(const char *buf, size_t len, char *utf8_output) {
+  char *start = utf8_output;
+  size_t pos = 0;
+  // if there's at least 128 bytes remaining, we don't need to mask the output
+  for (; pos + 128 <= len; pos += 64) {
+    __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
+    utf8_output += latin1_to_utf8_avx512_branch(input, utf8_output);
+  }
+  // in the last 128 bytes, the first 64 may require masking the output
+  if (pos + 64 <= len) {
+    __m512i input = _mm512_loadu_si512((__m512i *)(buf + pos));
+    utf8_output += latin1_to_utf8_avx512_vec(input, 64, utf8_output, 1);
+    pos += 64;
+  }
+  // with the last 64 bytes, the input also needs to be masked
+  if (pos < len) {
+    __mmask64 load_mask = _bzhi_u64(~0ULL, (unsigned int)(len - pos));
+    __m512i input = _mm512_maskz_loadu_epi8(load_mask, (__m512i *)(buf + pos));
+    utf8_output += latin1_to_utf8_avx512_vec(input, len - pos, utf8_output, 1);
+  }
+  return (size_t)(utf8_output - start);
+}
+/* end file src/icelake/icelake_convert_latin1_to_utf8.inl.cpp */
+/* begin file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */
+// file included directly
+template 
+size_t icelake_convert_latin1_to_utf16(const char *latin1_input, size_t len,
+                                       char16_t *utf16_output) {
+  size_t rounded_len = len & ~0x1F; // Round down to nearest multiple of 32
+
+  __m512i byteflip = _mm512_setr_epi64(0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809,
+                                       0x0607040502030001, 0x0e0f0c0d0a0b0809);
+  for (size_t i = 0; i < rounded_len; i += 32) {
+    // Load 32 Latin1 characters into a 256-bit register
+    __m256i in = _mm256_loadu_si256((__m256i *)&latin1_input[i]);
+    // Zero extend each set of 8 Latin1 characters to 32 16-bit integers
+    __m512i out = _mm512_cvtepu8_epi16(in);
+    if (big_endian) {
+      out = _mm512_shuffle_epi8(out, byteflip);
+    }
+    // Store the results back to memory
+    _mm512_storeu_si512((__m512i *)&utf16_output[i], out);
+  }
+  if (rounded_len != len) {
+    uint32_t mask = uint32_t(1 << (len - rounded_len)) - 1;
+    __m256i in = _mm256_maskz_loadu_epi8(mask, latin1_input + rounded_len);
 
-tail:
-  if (inlen != 0) {
-    // We must have inlen < 31.
-    inmask = _cvtu32_mask32((1 << inlen) - 1);
-    in = _mm512_maskz_loadu_epi16(inmask, inbuf);
-    if(big_endian) { in = _mm512_shuffle_epi8(in, byteflip); }
-    adjust = inlen - 31;
-    inlen = 0;
-    goto lastiteration;
+    // Zero extend each set of 8 Latin1 characters to 32 16-bit integers
+    __m512i out = _mm512_cvtepu8_epi16(in);
+    if (big_endian) {
+      out = _mm512_shuffle_epi8(out, byteflip);
+    }
+    // Store the results back to memory
+    _mm512_mask_storeu_epi16(utf16_output + rounded_len, mask, out);
   }
-  *outlen = (outbuf - outbuf_orig) + adjust;
-  return ((inbuf - inbuf_orig) + adjust);
+
+  return len;
 }
-/* end file src/icelake/icelake_convert_utf16_to_utf8.inl.cpp */
+/* end file src/icelake/icelake_convert_latin1_to_utf16.inl.cpp */
+/* begin file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */
+std::pair avx512_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
+    size_t rounded_len = len & ~0xF;  // Round down to nearest multiple of 16
+    
+    for (size_t i = 0; i < rounded_len; i += 16) { 
+        // Load 16 Latin1 characters into a 128-bit register
+        __m128i in = _mm_loadu_si128((__m128i*)&buf[i]);
+        
+        // Zero extend each set of 8 Latin1 characters to 16 32-bit integers using vpmovzxbd
+        __m512i out = _mm512_cvtepu8_epi32(in);
+        
+        // Store the results back to memory
+        _mm512_storeu_si512((__m512i*)&utf32_output[i], out);
+    }
+
+    // Return pointers pointing to where we left off
+    return std::make_pair(buf + rounded_len, utf32_output + rounded_len);
+}
+/* end file src/icelake/icelake_convert_latin1_to_utf32.inl.cpp */
+
+
+#include 
 
 } // namespace
 } // namespace icelake
@@ -18148,7 +20739,6 @@ size_t utf16_to_utf8_avx512i(const char16_t *inbuf, size_t inlen,
 namespace simdutf {
 namespace icelake {
 
-
 simdutf_warn_unused int
 implementation::detect_encodings(const char *input,
                                  size_t length) const noexcept {
@@ -18183,7 +20773,7 @@ implementation::detect_encodings(const char *input,
         // be valid UTF-16LE, at least one surrogate must be in the two most
         // significant bytes of a 32-bit word since they always come in pairs in
         // UTF-16LE. Note that we always proceed in multiple of 4 before this
-        // point so there is no offset in 32-bit words.
+        // point so there is no offset in 32-bit code units.
 
         if ((surrogates & 0xaaaaaaaa) != 0) {
           is_utf32 = false;
@@ -18199,7 +20789,7 @@ implementation::detect_encodings(const char *input,
           if (ends_with_high) {
             buf +=
                 31 *
-                sizeof(char16_t); // advance only by 31 words so that we start
+                sizeof(char16_t); // advance only by 31 code units so that we start
                                   // with the high surrogate on the next round.
           } else {
             buf += 32 * sizeof(char16_t);
@@ -18368,7 +20958,7 @@ simdutf_warn_unused bool implementation::validate_utf16le(const char16_t *buf, s
         }
         bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
         if(ends_with_high) {
-          buf += 31; // advance only by 31 words so that we start with the high surrogate on the next round.
+          buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
         } else {
           buf += 32;
         }
@@ -18417,7 +21007,7 @@ simdutf_warn_unused bool implementation::validate_utf16be(const char16_t *buf, s
         }
         bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
         if(ends_with_high) {
-          buf += 31; // advance only by 31 words so that we start with the high surrogate on the next round.
+          buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
         } else {
           buf += 32;
         }
@@ -18459,7 +21049,7 @@ simdutf_warn_unused result implementation::validate_utf16le_with_errors(const ch
         }
         bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
         if(ends_with_high) {
-          buf += 31; // advance only by 31 words so that we start with the high surrogate on the next round.
+          buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
         } else {
           buf += 32;
         }
@@ -18513,7 +21103,7 @@ simdutf_warn_unused result implementation::validate_utf16be_with_errors(const ch
         }
         bool ends_with_high = ((highsurrogates & 0x80000000) != 0);
         if(ends_with_high) {
-          buf += 31; // advance only by 31 words so that we start with the high surrogate on the next round.
+          buf += 31; // advance only by 31 code units so that we start with the high surrogate on the next round.
         } else {
           buf += 32;
         }
@@ -18588,6 +21178,63 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors(const char
     return result(error_code::SUCCESS, len);
 }
 
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
+  return icelake::latin1_to_utf8_avx512_start(buf, len, utf8_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return icelake_convert_latin1_to_utf16(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+  return icelake_convert_latin1_to_utf16(buf, len, utf16_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+    std::pair ret = avx512_convert_latin1_to_utf32(buf, len, utf32_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf32_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake::utf8_to_latin1_avx512(buf, len, latin1_output);
+}
+
+
+simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
+  // Initialize output length and input length counters
+  size_t inlen = 0;
+
+  // First, try to convert as much as possible using the SIMD implementation.
+  inlen = icelake::utf8_to_latin1_avx512(buf, len, latin1_output);
+  
+  // If we have completely converted the string
+  if(inlen == len) {
+    return {simdutf::SUCCESS, len};
+  }
+  
+  // Else if there are remaining bytes, use the scalar function to process them.
+  // Note: This is assuming scalar::utf8_to_latin1::convert_with_errors is a function that takes 
+  // the input buffer, length, and output buffer, and returns a result object with an error code 
+  // and the number of characters processed.
+  result res = scalar::utf8_to_latin1::convert_with_errors(buf + inlen, len - inlen, latin1_output + inlen);
+  res.count += inlen; // Add the number of characters processed by the SIMD implementation
+
+  return res;
+}
+
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake::valid_utf8_to_latin1_avx512(buf, len, latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
   utf8_to_utf16_result ret = fast_avx512_convert_utf8_to_utf16(buf, len, utf16_output);
   if (ret.second == nullptr) {
@@ -18767,6 +21414,33 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
   return saved_bytes;
 }
 
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf16_to_latin1(buf,len,latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf16_to_latin1(buf,len,latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf16_to_latin1_with_errors(buf,len,latin1_output).first;
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf16_to_latin1_with_errors(buf,len,latin1_output).first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement custom function
+  return convert_utf16be_to_latin1(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement custom function
+  return convert_utf16le_to_latin1(buf, len, latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
   size_t outlen;
   size_t inlen = utf16_to_utf8_avx512i(buf, len, (unsigned char*)utf8_output, &outlen);
@@ -18811,6 +21485,18 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const c
   return convert_utf16be_to_utf8(buf, len, utf8_output);
 }
 
+simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf32_to_latin1(buf,len,latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf32_to_latin1_with_errors(buf,len,latin1_output).first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return icelake_convert_utf32_to_latin1(buf,len,latin1_output);
+}
+
 
 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
   std::pair ret = avx512_convert_utf32_to_utf8(buf, len, utf8_output);
@@ -18826,7 +21512,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t*
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = icelake::avx512_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
@@ -18838,7 +21524,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(con
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -18873,7 +21559,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = avx512_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
@@ -18885,12 +21571,12 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = avx512_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
@@ -18902,7 +21588,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -19088,23 +21774,75 @@ simdutf_warn_unused size_t implementation::count_utf16be(const char16_t * input,
 
 
 simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t length) const noexcept {
-  const char* end = length >= 64 ? input + length - 64 : nullptr;
-  const char* ptr = input;
+  const uint8_t *str = reinterpret_cast(input);
+  size_t answer =  length / sizeof(__m512i) * sizeof(__m512i); // Number of 512-bit chunks that fits into the length.
+  size_t i = 0;
+  __m512i unrolled_popcount{0}; 
 
   const __m512i continuation = _mm512_set1_epi8(char(0b10111111));
 
-  size_t count{0};
+  while (i + sizeof(__m512i) <= length) {
+    size_t iterations = (length - i) / sizeof(__m512i);
 
-  while (ptr <= end) {
-    __m512i utf8 = _mm512_loadu_si512((const __m512i*)ptr);
-    ptr += 64;
-    uint64_t continuation_bitmask = static_cast(_mm512_cmple_epi8_mask(utf8, continuation));
-    count += 64 - count_ones(continuation_bitmask);
+    size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i);
+    for (; i + 8*sizeof(__m512i) <= max_i; i += 8*sizeof(__m512i)) {
+        __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
+        __m512i input2 = _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
+        __m512i input3 = _mm512_loadu_si512((const __m512i *)(str + i + 2*sizeof(__m512i)));
+        __m512i input4 = _mm512_loadu_si512((const __m512i *)(str + i + 3*sizeof(__m512i)));
+        __m512i input5 = _mm512_loadu_si512((const __m512i *)(str + i + 4*sizeof(__m512i)));
+        __m512i input6 = _mm512_loadu_si512((const __m512i *)(str + i + 5*sizeof(__m512i)));
+        __m512i input7 = _mm512_loadu_si512((const __m512i *)(str + i + 6*sizeof(__m512i)));
+        __m512i input8 = _mm512_loadu_si512((const __m512i *)(str + i + 7*sizeof(__m512i)));
+
+
+        __mmask64 mask1 = _mm512_cmple_epi8_mask(input1, continuation);
+        __mmask64 mask2 = _mm512_cmple_epi8_mask(input2, continuation);
+        __mmask64 mask3 = _mm512_cmple_epi8_mask(input3, continuation);
+        __mmask64 mask4 = _mm512_cmple_epi8_mask(input4, continuation);
+        __mmask64 mask5 = _mm512_cmple_epi8_mask(input5, continuation);
+        __mmask64 mask6 = _mm512_cmple_epi8_mask(input6, continuation);
+        __mmask64 mask7 = _mm512_cmple_epi8_mask(input7, continuation);
+        __mmask64 mask8 = _mm512_cmple_epi8_mask(input8, continuation);
+
+        __m512i mask_register = _mm512_set_epi64(mask8, mask7, mask6, mask5, mask4, mask3, mask2, mask1);
+
+
+        unrolled_popcount = _mm512_add_epi64(unrolled_popcount, _mm512_popcnt_epi64(mask_register));
+    }
+
+    for (; i <= max_i; i += sizeof(__m512i)) {
+      __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
+      uint64_t continuation_bitmask = static_cast(_mm512_cmple_epi8_mask(more_input, continuation));
+      answer -= count_ones(continuation_bitmask);
+    }
   }
 
-  return count + scalar::utf8::count_code_points(ptr, length - (ptr - input));
+  __m256i first_half = _mm512_extracti64x4_epi64(unrolled_popcount, 0);
+  __m256i second_half = _mm512_extracti64x4_epi64(unrolled_popcount, 1);
+  answer -= (size_t)_mm256_extract_epi64(first_half, 0) +
+            (size_t)_mm256_extract_epi64(first_half, 1) +
+            (size_t)_mm256_extract_epi64(first_half, 2) +
+            (size_t)_mm256_extract_epi64(first_half, 3) +
+            (size_t)_mm256_extract_epi64(second_half, 0) +
+            (size_t)_mm256_extract_epi64(second_half, 1) +
+            (size_t)_mm256_extract_epi64(second_half, 2) +
+            (size_t)_mm256_extract_epi64(second_half, 3);
+
+  return answer + scalar::utf8::count_code_points(reinterpret_cast(str + i), length - i);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
+  return count_utf8(buf,len);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
+  return scalar::utf16::latin1_length_from_utf16(length);
 }
 
+simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
+  return scalar::utf32::latin1_length_from_utf32(length);
+}
 
 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
   const char16_t* end = length >= 32 ? input + length - 32 : nullptr;
@@ -19183,6 +21921,76 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char1
   return implementation::count_utf16be(input, length);
 }
 
+simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf16_length_from_latin1(length);
+}
+
+
+simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf32_length_from_latin1(length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t length) const noexcept {
+  const uint8_t *str = reinterpret_cast(input);
+  size_t answer = length / sizeof(__m512i) * sizeof(__m512i);
+  size_t i = 0;
+  unsigned char v_0xFF = 0xff;
+  __m512i eight_64bits = _mm512_setzero_si512();
+  while (i + sizeof(__m512i) <= length) {
+    __m512i runner = _mm512_setzero_si512();
+    size_t iterations = (length - i) / sizeof(__m512i);
+    if (iterations > 255) {
+      iterations = 255;
+    }
+    size_t max_i = i + iterations * sizeof(__m512i) - sizeof(__m512i);
+    for (; i + 4*sizeof(__m512i) <= max_i; i += 4*sizeof(__m512i)) {
+            // Load four __m512i vectors
+            __m512i input1 = _mm512_loadu_si512((const __m512i *)(str + i));
+            __m512i input2 = _mm512_loadu_si512((const __m512i *)(str + i + sizeof(__m512i)));
+            __m512i input3 = _mm512_loadu_si512((const __m512i *)(str + i + 2*sizeof(__m512i)));
+            __m512i input4 = _mm512_loadu_si512((const __m512i *)(str + i + 3*sizeof(__m512i)));
+
+            // Generate four masks
+            __mmask64 mask1 = _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input1);
+            __mmask64 mask2 = _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input2);
+            __mmask64 mask3 = _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input3);
+            __mmask64 mask4 = _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), input4);
+            // Apply the masks and subtract from the runner
+            __m512i not_ascii1 = _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask1, v_0xFF);
+            __m512i not_ascii2 = _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask2, v_0xFF);
+            __m512i not_ascii3 = _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask3, v_0xFF);
+            __m512i not_ascii4 = _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask4, v_0xFF);
+
+            runner = _mm512_sub_epi8(runner, not_ascii1);
+            runner = _mm512_sub_epi8(runner, not_ascii2);
+            runner = _mm512_sub_epi8(runner, not_ascii3);
+            runner = _mm512_sub_epi8(runner, not_ascii4);
+    }
+
+    for (; i <= max_i; i += sizeof(__m512i)) {
+      __m512i more_input = _mm512_loadu_si512((const __m512i *)(str + i));
+
+      __mmask64 mask = _mm512_cmpgt_epi8_mask(_mm512_setzero_si512(), more_input);
+      __m512i not_ascii = _mm512_mask_set1_epi8(_mm512_setzero_si512(), mask, v_0xFF);
+      runner = _mm512_sub_epi8(runner, not_ascii);
+    }
+
+    eight_64bits = _mm512_add_epi64(eight_64bits, _mm512_sad_epu8(runner, _mm512_setzero_si512()));
+  }
+
+  __m256i first_half = _mm512_extracti64x4_epi64(eight_64bits, 0);
+  __m256i second_half = _mm512_extracti64x4_epi64(eight_64bits, 1);
+  answer += (size_t)_mm256_extract_epi64(first_half, 0) +
+            (size_t)_mm256_extract_epi64(first_half, 1) +
+            (size_t)_mm256_extract_epi64(first_half, 2) +
+            (size_t)_mm256_extract_epi64(first_half, 3) +
+            (size_t)_mm256_extract_epi64(second_half, 0) +
+            (size_t)_mm256_extract_epi64(second_half, 1) +
+            (size_t)_mm256_extract_epi64(second_half, 2) +
+            (size_t)_mm256_extract_epi64(second_half, 3);
+  return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast(str + i), length - i);
+}
+
 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
     size_t pos = 0;
     size_t count = 0;
@@ -19252,7 +22060,6 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i
 } // namespace icelake
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/icelake/end.h
 /* begin file src/simdutf/icelake/end.h */
 #if SIMDUTF_CAN_ALWAYS_RUN_ICELAKE
 // nothing needed.
@@ -19268,10 +22075,8 @@ SIMDUTF_POP_DISABLE_WARNINGS
 /* end file src/icelake/implementation.cpp */
 #endif
 #if SIMDUTF_IMPLEMENTATION_HASWELL
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/implementation.cpp
 /* begin file src/haswell/implementation.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/begin.h
 /* begin file src/simdutf/haswell/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "haswell"
 // #define SIMDUTF_IMPLEMENTATION haswell
@@ -19314,7 +22119,6 @@ simdutf_really_inline simd8 must_be_2_3_continuation(const simd8
   return simd8(is_third_byte | is_fourth_byte) > int8_t(0);
 }
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_detect_encodings.cpp
 /* begin file src/haswell/avx2_detect_encodings.cpp */
 template
 // len is known to be a multiple of 2 when this is called
@@ -19358,7 +22162,7 @@ int avx2_detect_encodings(const char * buf, size_t len) {
             // To be valid UTF-32, a surrogate cannot be in the two most significant bytes of any 32-bit word.
             // On the other hand, to be valid UTF-16LE, at least one surrogate must be in the two most significant
             // bytes of a 32-bit word since they always come in pairs in UTF-16LE.
-            // Note that we always proceed in multiple of 4 before this point so there is no offset in 32-bit words.
+            // Note that we always proceed in multiple of 4 before this point so there is no offset in 32-bit code units.
 
             if ((surrogates_bitmask0 & 0xaaaaaaaa) != 0) {
                 is_utf32 = false;
@@ -19504,10 +22308,9 @@ int avx2_detect_encodings(const char * buf, size_t len) {
 }
 /* end file src/haswell/avx2_detect_encodings.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_validate_utf16.cpp
 /* begin file src/haswell/avx2_validate_utf16.cpp */
 /*
-    In UTF-16 words in range 0xD800 to 0xDFFF have special meaning.
+    In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning.
 
     In a vectorized algorithm we want to examine the most significant
     nibble in order to select a fast path. If none of highest nibbles
@@ -19543,7 +22346,7 @@ int avx2_detect_encodings(const char * buf, size_t len) {
       0   0   1   0   1   0   0   0   b = a << 1
       1   1   1   1   1   1   1   0   c = V | a | b
                                   ^
-                                  the last bit can be zero, we just consume 7 words
+                                  the last bit can be zero, we just consume 7 code units
                                   and recheck this word in the next iteration
 */
 
@@ -19589,7 +22392,7 @@ const char16_t* avx2_validate_utf16(const char16_t* input, size_t size) {
             //
             //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
 
-            // V - non-surrogate words
+            // V - non-surrogate code units
             //     V = not surrogates_wordmask
             const uint32_t V = ~surrogates_bitmask;
 
@@ -19610,10 +22413,10 @@ const char16_t* avx2_validate_utf16(const char16_t* input, size_t size) {
 
             if (c == 0xffffffff) {
                 // The whole input register contains valid UTF-16, i.e.,
-                // either single words or proper surrogate pairs.
+                // either single code units or proper surrogate pairs.
                 input += simd16::ELEMENTS * 2;
             } else if (c == 0x7fffffff) {
-                // The 31 lower words of the input register contains valid UTF-16.
+                // The 31 lower code units of the input register contains valid UTF-16.
                 // The 31 word may be either a low or high surrogate. It the next
                 // iteration we 1) check if the low surrogate is followed by a high
                 // one, 2) reject sole high surrogate.
@@ -19667,7 +22470,7 @@ const result avx2_validate_utf16_with_errors(const char16_t* input, size_t size)
             //
             //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
 
-            // V - non-surrogate words
+            // V - non-surrogate code units
             //     V = not surrogates_wordmask
             const uint32_t V = ~surrogates_bitmask;
 
@@ -19688,10 +22491,10 @@ const result avx2_validate_utf16_with_errors(const char16_t* input, size_t size)
 
             if (c == 0xffffffff) {
                 // The whole input register contains valid UTF-16, i.e.,
-                // either single words or proper surrogate pairs.
+                // either single code units or proper surrogate pairs.
                 input += simd16::ELEMENTS * 2;
             } else if (c == 0x7fffffff) {
-                // The 31 lower words of the input register contains valid UTF-16.
+                // The 31 lower code units of the input register contains valid UTF-16.
                 // The 31 word may be either a low or high surrogate. It the next
                 // iteration we 1) check if the low surrogate is followed by a high
                 // one, 2) reject sole high surrogate.
@@ -19705,7 +22508,6 @@ const result avx2_validate_utf16_with_errors(const char16_t* input, size_t size)
     return result(error_code::SUCCESS, input - start);
 }
 /* end file src/haswell/avx2_validate_utf16.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_validate_utf32le.cpp
 /* begin file src/haswell/avx2_validate_utf32le.cpp */
 /* Returns:
    - pointer to the last unprocessed character (a scalar fallback should check the rest);
@@ -19771,7 +22573,145 @@ const result avx2_validate_utf32le_with_errors(const char32_t* input, size_t siz
 }
 /* end file src/haswell/avx2_validate_utf32le.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf8_to_utf16.cpp
+/* begin file src/haswell/avx2_convert_latin1_to_utf8.cpp */
+std::pair avx2_convert_latin1_to_utf8(const char *latin1_input, size_t len,
+                           char *utf8_output) {
+  const char *end = latin1_input + len;
+  const __m256i v_0000 = _mm256_setzero_si256();
+  const __m256i v_c080 = _mm256_set1_epi16((int16_t)0xc080);
+  const __m256i v_ff80 = _mm256_set1_epi16((int16_t)0xff80);
+  const size_t safety_margin = 12;
+
+  while (latin1_input + 16 + safety_margin <= end) {
+    __m128i in8 = _mm_loadu_si128((__m128i *)latin1_input);
+    // a single 16-bit UTF-16 word can yield 1, 2 or 3 UTF-8 bytes
+    const __m128i v_80 = _mm_set1_epi8((char)0x80);
+    if (_mm_testz_si128(in8, v_80)) { // ASCII fast path!!!!
+      // 1. store (16 bytes)
+      _mm_storeu_si128((__m128i *)utf8_output, in8);
+      // 2. adjust pointers
+      latin1_input += 16;
+      utf8_output += 16;
+      continue; // we are done for this round!
+    }
+    // We proceed only with the first 16 bytes.
+    const __m256i in = _mm256_cvtepu8_epi16((in8));
+
+    // 1. prepare 2-byte values
+    // input 16-bit word : [0000|0000|aabb|bbbb] x 8
+    // expected output   : [1100|00aa|10bb|bbbb] x 8
+    const __m256i v_1f00 = _mm256_set1_epi16((int16_t)0x1f00);
+    const __m256i v_003f = _mm256_set1_epi16((int16_t)0x003f);
+
+    // t0 = [0000|00aa|bbbb|bb00]
+    const __m256i t0 = _mm256_slli_epi16(in, 2);
+    // t1 = [0000|00aa|0000|0000]
+    const __m256i t1 = _mm256_and_si256(t0, v_1f00);
+    // t2 = [0000|0000|00bb|bbbb]
+    const __m256i t2 = _mm256_and_si256(in, v_003f);
+    // t3 = [000a|aaaa|00bb|bbbb]
+    const __m256i t3 = _mm256_or_si256(t1, t2);
+    // t4 = [1100|00aa|10bb|bbbb]
+    const __m256i t4 = _mm256_or_si256(t3, v_c080);
+
+    // 2. merge ASCII and 2-byte codewords
+
+    // no bits set above 7th bit
+    const __m256i one_byte_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in, v_ff80), v_0000);
+    const uint32_t one_byte_bitmask = static_cast(_mm256_movemask_epi8(one_byte_bytemask));
+
+    const __m256i utf8_unpacked = _mm256_blendv_epi8(t4, in, one_byte_bytemask);
+
+    // 3. prepare bitmask for 8-bit lookup
+    const uint32_t M0 = one_byte_bitmask & 0x55555555;
+    const uint32_t M1 = M0 >> 7;
+    const uint32_t M2 = (M1 | M0) & 0x00ff00ff;
+    // 4. pack the bytes
+
+    const uint8_t *row =
+        &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2)][0];
+    const uint8_t *row_2 =
+        &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[uint8_t(M2 >> 16)]
+                                                            [0];
+
+    const __m128i shuffle = _mm_loadu_si128((__m128i *)(row + 1));
+    const __m128i shuffle_2 = _mm_loadu_si128((__m128i *)(row_2 + 1));
+
+    const __m256i utf8_packed = _mm256_shuffle_epi8(
+        utf8_unpacked, _mm256_setr_m128i(shuffle, shuffle_2));
+    // 5. store bytes
+    _mm_storeu_si128((__m128i *)utf8_output,
+                     _mm256_castsi256_si128(utf8_packed));
+    utf8_output += row[0];
+    _mm_storeu_si128((__m128i *)utf8_output,
+                     _mm256_extractf128_si256(utf8_packed, 1));
+    utf8_output += row_2[0];
+
+    // 6. adjust pointers
+    latin1_input += 16;
+    continue;
+
+  } // while
+  return std::make_pair(latin1_input, utf8_output);
+}
+/* end file src/haswell/avx2_convert_latin1_to_utf8.cpp */
+/* begin file src/haswell/avx2_convert_latin1_to_utf16.cpp */
+template 
+std::pair avx2_convert_latin1_to_utf16(const char* latin1_input, size_t len, char16_t* utf16_output) {
+    size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 32
+
+    size_t i = 0;
+    for (; i < rounded_len; i += 16) {
+        // Load 16 bytes from the address (input + i) into a xmm register
+        __m128i xmm0 = _mm_loadu_si128(reinterpret_cast(latin1_input + i));
+
+        // Zero extend each byte in xmm0 to word and put it in another xmm register
+        __m128i xmm1 = _mm_cvtepu8_epi16(xmm0);
+        
+        // Shift xmm0 to the right by 8 bytes
+        xmm0 = _mm_srli_si128(xmm0, 8);
+        
+        // Zero extend each byte in the shifted xmm0 to word in xmm0
+        xmm0 = _mm_cvtepu8_epi16(xmm0);
+
+        if (big_endian) {
+            const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+            xmm0 = _mm_shuffle_epi8(xmm0, swap);
+            xmm1 = _mm_shuffle_epi8(xmm1, swap);
+        }
+        
+        // Store the contents of xmm1 into the address pointed by (output + i)
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(utf16_output + i), xmm1);
+        
+        // Store the contents of xmm0 into the address pointed by (output + i + 8)
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(utf16_output + i + 8), xmm0);
+    }
+
+    return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len);
+
+}
+/* end file src/haswell/avx2_convert_latin1_to_utf16.cpp */
+/* begin file src/haswell/avx2_convert_latin1_to_utf32.cpp */
+std::pair avx2_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
+    size_t rounded_len = ((len | 7) ^ 7);  // Round down to nearest multiple of 8
+    
+    for (size_t i = 0; i < rounded_len; i += 8) { 
+        // Load 8 Latin1 characters into a 64-bit register
+        __m128i in = _mm_loadl_epi64((__m128i*)&buf[i]);
+        
+        // Zero extend each set of 8 Latin1 characters to 8 32-bit integers using vpmovzxbd
+        __m256i out = _mm256_cvtepu8_epi32(in);
+        
+        // Store the results back to memory
+        _mm256_storeu_si256((__m256i*)&utf32_output[i], out);
+    }
+
+    // return pointers pointing to where we left off
+    return std::make_pair(buf + rounded_len, utf32_output + rounded_len);
+}
+
+/* end file src/haswell/avx2_convert_latin1_to_utf32.cpp */
+
 /* begin file src/haswell/avx2_convert_utf8_to_utf16.cpp */
 // depends on "tables/utf8_to_utf16_tables.h"
 
@@ -19811,7 +22751,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     return 16; // We consumed 16 bytes.
   }
   if(((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) {
-    // We want to take 8 2-byte UTF-8 words and turn them into 8 2-byte UTF-16 words.
+    // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte UTF-16 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -19824,7 +22764,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     return 16;
   }
   if(input_utf8_end_of_code_point_mask == 0x924) {
-    // We want to take 4 3-byte UTF-8 words and turn them into 4 2-byte UTF-16 words.
+    // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte UTF-16 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -19850,10 +22790,10 @@ size_t convert_masked_utf8_to_utf16(const char *input,
   const uint8_t consumed =
       simdutf::tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
   if (idx < 64) {
-    // SIX (6) input code-words
+    // SIX (6) input code-code units
     // this is a relatively easy scenario
-    // we process SIX (6) input code-words. The max length in bytes of six code
-    // words spanning between 1 and 2 bytes each is 12 bytes. On processors
+    // we process SIX (6) input code-code units. The max length in bytes of six code
+    // code units spanning between 1 and 2 bytes each is 12 bytes. On processors
     // where pdep/pext is fast, we might be able to use a small lookup table.
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]);
@@ -19865,7 +22805,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     _mm_storeu_si128((__m128i *)utf16_output, composed);
     utf16_output += 6; // We wrote 12 bytes, 6 code points. There is a potential overflow of 4 bytes.
   } else if (idx < 145) {
-    // FOUR (4) input code-words
+    // FOUR (4) input code-code units
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)simdutf::tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -19884,7 +22824,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     _mm_storeu_si128((__m128i *)utf16_output, composed_repacked);
     utf16_output += 4; // Here we overflow by 8 bytes.
   } else if (idx < 209) {
-    // TWO (2) input code-words
+    // TWO (2) input code-code units
     //////////////
     // There might be garbage inputs where a leading byte mascarades as a four-byte
     // leading byte (by being followed by 3 continuation byte), but is not greater than
@@ -19954,7 +22894,6 @@ size_t convert_masked_utf8_to_utf16(const char *input,
   return consumed;
 }
 /* end file src/haswell/avx2_convert_utf8_to_utf16.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf8_to_utf32.cpp
 /* begin file src/haswell/avx2_convert_utf8_to_utf32.cpp */
 // depends on "tables/utf8_to_utf16_tables.h"
 
@@ -19987,7 +22926,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     return 16; // We consumed 16 bytes.
   }
   if(((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) {
-    // We want to take 8 2-byte UTF-8 words and turn them into 8 4-byte UTF-32 words.
+    // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte UTF-32 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -19999,7 +22938,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     return 16;
   }
   if(input_utf8_end_of_code_point_mask == 0x924) {
-    // We want to take 4 3-byte UTF-8 words and turn them into 4 4-byte UTF-32 words.
+    // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte UTF-32 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -20024,10 +22963,10 @@ size_t convert_masked_utf8_to_utf32(const char *input,
   const uint8_t consumed =
       tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
   if (idx < 64) {
-    // SIX (6) input code-words
+    // SIX (6) input code-code units
     // this is a relatively easy scenario
-    // we process SIX (6) input code-words. The max length in bytes of six code
-    // words spanning between 1 and 2 bytes each is 12 bytes. On processors
+    // we process SIX (6) input code-code units. The max length in bytes of six code
+    // code units spanning between 1 and 2 bytes each is 12 bytes. On processors
     // where pdep/pext is fast, we might be able to use a small lookup table.
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
@@ -20039,7 +22978,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     utf32_output += 6; // We wrote 24 bytes, 6 code points. There is a potential
     // overflow of 32 - 24 = 8 bytes.
   } else if (idx < 145) {
-    // FOUR (4) input code-words
+    // FOUR (4) input code-code units
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -20056,7 +22995,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     _mm_storeu_si128((__m128i *)utf32_output, composed);
     utf32_output += 4;
   } else if (idx < 209) {
-    // TWO (2) input code-words
+    // TWO (2) input code-code units
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -20083,11 +23022,97 @@ size_t convert_masked_utf8_to_utf32(const char *input,
 }
 /* end file src/haswell/avx2_convert_utf8_to_utf32.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf16_to_utf8.cpp
+/* begin file src/haswell/avx2_convert_utf16_to_latin1.cpp */
+template 
+std::pair
+avx2_convert_utf16_to_latin1(const char16_t *buf, size_t len,
+                             char *latin1_output) {
+  const char16_t *end = buf + len;
+  while (buf + 16 <= end) {
+    // Load 16 UTF-16 characters into 256-bit AVX2 register
+    __m256i in = _mm256_loadu_si256(reinterpret_cast(buf));
+
+    if (!match_system(big_endian)) {
+      const __m256i swap = _mm256_setr_epi8(
+          1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18,
+          21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30);
+      in = _mm256_shuffle_epi8(in, swap);
+    }
+
+    __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00);
+    if (_mm256_testz_si256(in, high_byte_mask)) {
+      // Pack 16-bit characters into 8-bit and store in latin1_output
+      __m128i lo = _mm256_extractf128_si256(in, 0);
+      __m128i hi = _mm256_extractf128_si256(in, 1);
+      __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo);
+      __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi);
+      _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output),
+                       latin1_packed_lo);
+      _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8),
+                       latin1_packed_hi);
+      // Adjust pointers for next iteration
+      buf += 16;
+      latin1_output += 16;
+    } else {
+      return std::make_pair(nullptr, reinterpret_cast(latin1_output));
+    }
+  } // while
+  return std::make_pair(buf, latin1_output);
+}
+
+template 
+std::pair
+avx2_convert_utf16_to_latin1_with_errors(const char16_t *buf, size_t len,
+                                         char *latin1_output) {
+  const char16_t *start = buf;
+  const char16_t *end = buf + len;
+  while (buf + 16 <= end) {
+    __m256i in = _mm256_loadu_si256(reinterpret_cast(buf));
+
+    if (!big_endian) {
+      const __m256i swap = _mm256_setr_epi8(
+          1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18,
+          21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30);
+      in = _mm256_shuffle_epi8(in, swap);
+    }
+
+    __m256i high_byte_mask = _mm256_set1_epi16((int16_t)0xFF00);
+    if (_mm256_testz_si256(in, high_byte_mask)) {
+      __m128i lo = _mm256_extractf128_si256(in, 0);
+      __m128i hi = _mm256_extractf128_si256(in, 1);
+      __m128i latin1_packed_lo = _mm_packus_epi16(lo, lo);
+      __m128i latin1_packed_hi = _mm_packus_epi16(hi, hi);
+      _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output),
+                       latin1_packed_lo);
+      _mm_storel_epi64(reinterpret_cast<__m128i *>(latin1_output + 8),
+                       latin1_packed_hi);
+      buf += 16;
+      latin1_output += 16;
+    } else {
+      // Fallback to scalar code for handling errors
+      for (int k = 0; k < 16; k++) {
+        uint16_t word = !match_system(big_endian)
+                            ? scalar::utf16::swap_bytes(buf[k])
+                            : buf[k];
+        if (word <= 0xff) {
+          *latin1_output++ = char(word);
+        } else {
+          return std::make_pair(
+              result{error_code::TOO_LARGE, (size_t)(buf - start + k)},
+              latin1_output);
+        }
+      }
+      buf += 16;
+    }
+  } // while
+  return std::make_pair(result{error_code::SUCCESS, (size_t)(buf - start)},
+                        latin1_output);
+}
+/* end file src/haswell/avx2_convert_utf16_to_latin1.cpp */
 /* begin file src/haswell/avx2_convert_utf16_to_utf8.cpp */
 /*
     The vectorized algorithm works on single SSE register i.e., it
-    loads eight 16-bit words.
+    loads eight 16-bit code units.
 
     We consider three cases:
     1. an input register contains no surrogates and each value
@@ -20099,7 +23124,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
 
     Ad 1.
 
-    When values are less than 0x0800, it means that a 16-bit words
+    When values are less than 0x0800, it means that a 16-bit code unit
     can be converted into: 1) single UTF8 byte (when it's an ASCII
     char) or 2) two UTF8 bytes.
 
@@ -20113,7 +23138,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
 
     Ad 2.
 
-    When values fit in 16-bit words, but are above 0x07ff, then
+    When values fit in 16-bit code units, but are above 0x07ff, then
     a single word may produce one, two or three UTF8 bytes.
 
     We prepare data for all these three cases in two registers.
@@ -20230,7 +23255,7 @@ std::pair avx2_convert_utf16_to_utf8(const char16_t* buf
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x00000000) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
         const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
                                                 0x0808, 0x0a0a, 0x0c0c, 0x0e0e,
                                                 0x0000, 0x0202, 0x0404, 0x0606,
@@ -20241,7 +23266,7 @@ std::pair avx2_convert_utf16_to_utf8(const char16_t* buf
            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-          We expand the input word (16-bit) into two words (32-bit), thus
+          We expand the input word (16-bit) into two code units (32-bit), thus
           we have room for four bytes. However, we need five distinct bit
           layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -20252,7 +23277,7 @@ std::pair avx2_convert_utf16_to_utf8(const char16_t* buf
           either byte 1 for case #2 or byte 2 for case #3. Note that they
           differ by exactly one bit.
 
-          Finally from these two words we build proper UTF-8 sequence, taking
+          Finally from these two code units we build proper UTF-8 sequence, taking
           into account the case (i.e, the number of bytes to write).
         */
         /**
@@ -20280,16 +23305,16 @@ std::pair avx2_convert_utf16_to_utf8(const char16_t* buf
         const __m256i s4 = _mm256_xor_si256(s3, m0);
 #undef simdutf_vec
 
-        // 4. expand words 16-bit => 32-bit
+        // 4. expand code units 16-bit => 32-bit
         const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
         const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
 
-        // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+        // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
         const uint32_t mask = (one_byte_bitmask & 0x55555555) |
                               (one_or_two_bytes_bitmask & 0xaaaaaaaa);
         // Due to the wider registers, the following path is less likely to be useful.
         /*if(mask == 0) {
-          // We only have three-byte words. Use fast path.
+          // We only have three-byte code units. Use fast path.
           const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
           const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
           const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
@@ -20473,7 +23498,7 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x00000000) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
         const __m256i dup_even = _mm256_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
                                                 0x0808, 0x0a0a, 0x0c0c, 0x0e0e,
                                                 0x0000, 0x0202, 0x0404, 0x0606,
@@ -20484,7 +23509,7 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-          We expand the input word (16-bit) into two words (32-bit), thus
+          We expand the input word (16-bit) into two code units (32-bit), thus
           we have room for four bytes. However, we need five distinct bit
           layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -20495,7 +23520,7 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
           either byte 1 for case #2 or byte 2 for case #3. Note that they
           differ by exactly one bit.
 
-          Finally from these two words we build proper UTF-8 sequence, taking
+          Finally from these two code units we build proper UTF-8 sequence, taking
           into account the case (i.e, the number of bytes to write).
         */
         /**
@@ -20523,16 +23548,16 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
         const __m256i s4 = _mm256_xor_si256(s3, m0);
 #undef simdutf_vec
 
-        // 4. expand words 16-bit => 32-bit
+        // 4. expand code units 16-bit => 32-bit
         const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
         const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
 
-        // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+        // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
         const uint32_t mask = (one_byte_bitmask & 0x55555555) |
                               (one_or_two_bytes_bitmask & 0xaaaaaaaa);
         // Due to the wider registers, the following path is less likely to be useful.
         /*if(mask == 0) {
-          // We only have three-byte words. Use fast path.
+          // We only have three-byte code units. Use fast path.
           const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
           const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
           const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
@@ -20616,11 +23641,10 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
   return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
 }
 /* end file src/haswell/avx2_convert_utf16_to_utf8.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf16_to_utf32.cpp
 /* begin file src/haswell/avx2_convert_utf16_to_utf32.cpp */
 /*
     The vectorized algorithm works on single SSE register i.e., it
-    loads eight 16-bit words.
+    loads eight 16-bit code units.
 
     We consider three cases:
     1. an input register contains no surrogates and each value
@@ -20632,7 +23656,7 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
 
     Ad 1.
 
-    When values are less than 0x0800, it means that a 16-bit words
+    When values are less than 0x0800, it means that a 16-bit code unit
     can be converted into: 1) single UTF8 byte (when it's an ASCII
     char) or 2) two UTF8 bytes.
 
@@ -20646,7 +23670,7 @@ std::pair avx2_convert_utf16_to_utf8_with_errors(const char16_t*
 
     Ad 2.
 
-    When values fit in 16-bit words, but are above 0x07ff, then
+    When values fit in 16-bit code units, but are above 0x07ff, then
     a single word may produce one, two or three UTF8 bytes.
 
     We prepare data for all these three cases in two registers.
@@ -20697,7 +23721,7 @@ std::pair avx2_convert_utf16_to_utf32(const char16_t
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x00000000) {
-      // case: we extend all sixteen 16-bit words to sixteen 32-bit words
+      // case: we extend all sixteen 16-bit code units to sixteen 32-bit code units
         _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in)));
         _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in,1)));
         utf32_output += 16;
@@ -20765,7 +23789,7 @@ std::pair avx2_convert_utf16_to_utf32_with_errors(const char1
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x00000000) {
-      // case: we extend all sixteen 16-bit words to sixteen 32-bit words
+      // case: we extend all sixteen 16-bit code units to sixteen 32-bit code units
         _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output), _mm256_cvtepu16_epi32(_mm256_castsi256_si128(in)));
         _mm256_storeu_si256(reinterpret_cast<__m256i *>(utf32_output + 8), _mm256_cvtepu16_epi32(_mm256_extractf128_si256(in,1)));
         utf32_output += 16;
@@ -20801,7 +23825,100 @@ std::pair avx2_convert_utf16_to_utf32_with_errors(const char1
 }
 /* end file src/haswell/avx2_convert_utf16_to_utf32.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf32_to_utf8.cpp
+/* begin file src/haswell/avx2_convert_utf32_to_latin1.cpp */
+std::pair
+avx2_convert_utf32_to_latin1(const char32_t *buf, size_t len,
+                             char *latin1_output) {
+  const size_t rounded_len =
+  len & ~0x1F; // Round down to nearest multiple of 32
+
+  __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00);
+
+  __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                    -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1,
+                                    -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
+
+  for (size_t i = 0; i < rounded_len; i += 16) {
+    __m256i in1 = _mm256_loadu_si256((__m256i *)buf);
+    __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8));
+
+    __m256i check_combined = _mm256_or_si256(in1, in2);
+
+    if (!_mm256_testz_si256(check_combined, high_bytes_mask)) {
+      return std::make_pair(nullptr, latin1_output);
+    }
+
+    //Turn UTF32 bytes into latin 1 bytes
+    __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask);
+    __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask);
+
+    //move Latin1 bytes to their correct spot
+    __m256i idx1 = _mm256_set_epi32(-1, -1,-1,-1,-1,-1,4,0);
+    __m256i idx2 = _mm256_set_epi32(-1, -1,-1,-1,4,0,-1,-1);
+    __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1);
+    __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2);
+
+    __m256i result = _mm256_or_si256(reshuffled1, reshuffled2);
+    _mm_storeu_si128((__m128i *)latin1_output,
+                     _mm256_castsi256_si128(result));
+
+    latin1_output += 16;
+    buf += 16;
+  }
+
+  return std::make_pair(buf, latin1_output);
+}
+std::pair
+avx2_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
+                                         char *latin1_output) {
+    const size_t rounded_len =
+        len & ~0x1F; // Round down to nearest multiple of 32
+
+    __m256i high_bytes_mask = _mm256_set1_epi32(0xFFFFFF00);
+    __m256i shufmask = _mm256_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+                                       -1, 12, 8, 4, 0, -1, -1, -1, -1, -1, -1,
+                                       -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
+
+    const char32_t *start = buf;
+
+    for (size_t i = 0; i < rounded_len; i += 16) {
+        __m256i in1 = _mm256_loadu_si256((__m256i *)buf);
+        __m256i in2 = _mm256_loadu_si256((__m256i *)(buf + 8));
+
+        __m256i check_combined = _mm256_or_si256(in1, in2);
+
+        if (!_mm256_testz_si256(check_combined, high_bytes_mask)) {
+            // Fallback to scalar code for handling errors
+            for (int k = 0; k < 8; k++) {
+                char32_t codepoint = buf[k];
+                if (codepoint <= 0xFF) {
+                    *latin1_output++ = static_cast(codepoint);
+                } else {
+                    return std::make_pair(result(error_code::TOO_LARGE, buf - start + k),
+                                          latin1_output);
+                }
+            }
+            buf += 8;
+        } else {
+            __m256i shuffled1 = _mm256_shuffle_epi8(in1, shufmask);
+            __m256i shuffled2 = _mm256_shuffle_epi8(in2, shufmask);
+
+            __m256i idx1 = _mm256_set_epi32(-1, -1, -1, -1, -1, -1, 4, 0);
+            __m256i idx2 = _mm256_set_epi32(-1, -1, -1, -1, 4, 0, -1, -1);
+            __m256i reshuffled1 = _mm256_permutevar8x32_epi32(shuffled1, idx1);
+            __m256i reshuffled2 = _mm256_permutevar8x32_epi32(shuffled2, idx2);
+
+            __m256i result = _mm256_or_si256(reshuffled1, reshuffled2);
+            _mm_storeu_si128((__m128i *)latin1_output, _mm256_castsi256_si128(result));
+
+            latin1_output += 16;
+            buf += 16;
+        }
+    }
+
+    return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
+}
+/* end file src/haswell/avx2_convert_utf32_to_latin1.cpp */
 /* begin file src/haswell/avx2_convert_utf32_to_utf8.cpp */
 std::pair avx2_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
   const char32_t* end = buf + len;
@@ -20821,7 +23938,7 @@ std::pair avx2_convert_utf32_to_utf8(const char32_t* buf
     __m256i nextin = _mm256_loadu_si256((__m256i*)buf+1);
     running_max = _mm256_max_epu32(_mm256_max_epu32(in, running_max), nextin);
 
-    // Pack 32-bit UTF-32 words to 16-bit UTF-16 words with unsigned saturation
+    // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned saturation
     __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), _mm256_and_si256(nextin, v_7fffffff));
     in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000);
 
@@ -20892,7 +24009,7 @@ std::pair avx2_convert_utf32_to_utf8(const char32_t* buf
     const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
     const uint32_t saturation_bitmask = static_cast(_mm256_movemask_epi8(saturation_bytemask));
     if (saturation_bitmask == 0xffffffff) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
       const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
       forbidden_bytemask = _mm256_or_si256(forbidden_bytemask, _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800));
 
@@ -20906,7 +24023,7 @@ std::pair avx2_convert_utf32_to_utf8(const char32_t* buf
         2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
         3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-        We expand the input word (16-bit) into two words (32-bit), thus
+        We expand the input word (16-bit) into two code units (32-bit), thus
         we have room for four bytes. However, we need five distinct bit
         layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -20917,7 +24034,7 @@ std::pair avx2_convert_utf32_to_utf8(const char32_t* buf
         either byte 1 for case #2 or byte 2 for case #3. Note that they
         differ by exactly one bit.
 
-        Finally from these two words we build proper UTF-8 sequence, taking
+        Finally from these two code units we build proper UTF-8 sequence, taking
         into account the case (i.e, the number of bytes to write).
       */
       /**
@@ -20945,16 +24062,16 @@ std::pair avx2_convert_utf32_to_utf8(const char32_t* buf
       const __m256i s4 = _mm256_xor_si256(s3, m0);
 #undef simdutf_vec
 
-      // 4. expand words 16-bit => 32-bit
+      // 4. expand code units 16-bit => 32-bit
       const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
       const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
 
-      // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+      // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
       const uint32_t mask = (one_byte_bitmask & 0x55555555) |
                             (one_or_two_bytes_bitmask & 0xaaaaaaaa);
       // Due to the wider registers, the following path is less likely to be useful.
       /*if(mask == 0) {
-        // We only have three-byte words. Use fast path.
+        // We only have three-byte code units. Use fast path.
         const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
         const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
         const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
@@ -21066,7 +24183,7 @@ std::pair avx2_convert_utf32_to_utf8_with_errors(const char32_t*
       return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
     }
 
-    // Pack 32-bit UTF-32 words to 16-bit UTF-16 words with unsigned saturation
+    // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned saturation
     __m256i in_16 = _mm256_packus_epi32(_mm256_and_si256(in, v_7fffffff), _mm256_and_si256(nextin, v_7fffffff));
     in_16 = _mm256_permute4x64_epi64(in_16, 0b11011000);
 
@@ -21137,9 +24254,9 @@ std::pair avx2_convert_utf32_to_utf8_with_errors(const char32_t*
     const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(_mm256_or_si256(in, nextin), v_ffff0000), v_0000);
     const uint32_t saturation_bitmask = static_cast(_mm256_movemask_epi8(saturation_bytemask));
     if (saturation_bitmask == 0xffffffff) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
 
-      // Check for illegal surrogate words
+      // Check for illegal surrogate code units
       const __m256i v_d800 = _mm256_set1_epi16((uint16_t)0xd800);
       const __m256i forbidden_bytemask = _mm256_cmpeq_epi16(_mm256_and_si256(in_16, v_f800), v_d800);
       if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0x0) {
@@ -21156,7 +24273,7 @@ std::pair avx2_convert_utf32_to_utf8_with_errors(const char32_t*
         2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
         3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-        We expand the input word (16-bit) into two words (32-bit), thus
+        We expand the input word (16-bit) into two code units (32-bit), thus
         we have room for four bytes. However, we need five distinct bit
         layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -21167,7 +24284,7 @@ std::pair avx2_convert_utf32_to_utf8_with_errors(const char32_t*
         either byte 1 for case #2 or byte 2 for case #3. Note that they
         differ by exactly one bit.
 
-        Finally from these two words we build proper UTF-8 sequence, taking
+        Finally from these two code units we build proper UTF-8 sequence, taking
         into account the case (i.e, the number of bytes to write).
       */
       /**
@@ -21195,16 +24312,16 @@ std::pair avx2_convert_utf32_to_utf8_with_errors(const char32_t*
       const __m256i s4 = _mm256_xor_si256(s3, m0);
 #undef simdutf_vec
 
-      // 4. expand words 16-bit => 32-bit
+      // 4. expand code units 16-bit => 32-bit
       const __m256i out0 = _mm256_unpacklo_epi16(t2, s4);
       const __m256i out1 = _mm256_unpackhi_epi16(t2, s4);
 
-      // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+      // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
       const uint32_t mask = (one_byte_bitmask & 0x55555555) |
                             (one_or_two_bytes_bitmask & 0xaaaaaaaa);
       // Due to the wider registers, the following path is less likely to be useful.
       /*if(mask == 0) {
-        // We only have three-byte words. Use fast path.
+        // We only have three-byte code units. Use fast path.
         const __m256i shuffle = _mm256_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1, 2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
         const __m256i utf8_0 = _mm256_shuffle_epi8(out0, shuffle);
         const __m256i utf8_1 = _mm256_shuffle_epi8(out1, shuffle);
@@ -21279,20 +24396,84 @@ std::pair avx2_convert_utf32_to_utf8_with_errors(const char32_t*
       }
       buf += k;
     }
-  } // while
+  } // while
+
+  return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
+}
+/* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */
+/* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */
+template 
+std::pair avx2_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
+  const char32_t* end = buf + len;
+
+  const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
+  __m256i forbidden_bytemask = _mm256_setzero_si256();
+
+
+  while (buf + 8 + safety_margin <= end) {
+    __m256i in = _mm256_loadu_si256((__m256i*)buf);
+
+    const __m256i v_00000000 = _mm256_setzero_si256();
+    const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000);
+
+    // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs
+    const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
+    const uint32_t saturation_bitmask = static_cast(_mm256_movemask_epi8(saturation_bytemask));
+
+    if (saturation_bitmask == 0xffffffff) {
+      const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
+      const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
+      forbidden_bytemask = _mm256_or_si256(forbidden_bytemask, _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800));
+
+      __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in),_mm256_extractf128_si256(in,1));
+      if (big_endian) {
+        const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        utf16_packed = _mm_shuffle_epi8(utf16_packed, swap);
+      }
+      _mm_storeu_si128((__m128i*)utf16_output, utf16_packed);
+      utf16_output += 8;
+      buf += 8;
+    } else {
+      size_t forward = 7;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFF0000)==0) {
+          // will not generate a surrogate pair
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, utf16_output); }
+          *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
+        } else {
+          // will generate a surrogate pair
+          if (word > 0x10FFFF) { return std::make_pair(nullptr, utf16_output); }
+          word -= 0x10000;
+          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
+          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
+          if (big_endian) {
+            high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
+            low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
+          }
+          *utf16_output++ = char16_t(high_surrogate);
+          *utf16_output++ = char16_t(low_surrogate);
+        }
+      }
+      buf += k;
+    }
+  }
 
-  return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
+  // check for invalid input
+  if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(nullptr, utf16_output); }
+
+  return std::make_pair(buf, utf16_output);
 }
-/* end file src/haswell/avx2_convert_utf32_to_utf8.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=haswell/avx2_convert_utf32_to_utf16.cpp
-/* begin file src/haswell/avx2_convert_utf32_to_utf16.cpp */
+
+
 template 
-std::pair avx2_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
+std::pair avx2_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
+  const char32_t* start = buf;
   const char32_t* end = buf + len;
 
   const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
-  __m256i forbidden_bytemask = _mm256_setzero_si256();
-
 
   while (buf + 8 + safety_margin <= end) {
     __m256i in = _mm256_loadu_si256((__m256i*)buf);
@@ -21307,7 +24488,10 @@ std::pair avx2_convert_utf32_to_utf16(const char32_t
     if (saturation_bitmask == 0xffffffff) {
       const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
       const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
-      forbidden_bytemask = _mm256_or_si256(forbidden_bytemask, _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800));
+      const __m256i forbidden_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800);
+      if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0x0) {
+        return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
+      }
 
       __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in),_mm256_extractf128_si256(in,1));
       if (big_endian) {
@@ -21325,11 +24509,11 @@ std::pair avx2_convert_utf32_to_utf16(const char32_t
         uint32_t word = buf[k];
         if((word & 0xFFFF0000)==0) {
           // will not generate a surrogate pair
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, utf16_output); }
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
           *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
         } else {
           // will generate a surrogate pair
-          if (word > 0x10FFFF) { return std::make_pair(nullptr, utf16_output); }
+          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
           word -= 0x10000;
           uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
           uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
@@ -21345,185 +24529,580 @@ std::pair avx2_convert_utf32_to_utf16(const char32_t
     }
   }
 
-  // check for invalid input
-  if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(nullptr, utf16_output); }
+  return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
+}
+/* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */
+
+/* begin file src/haswell/avx2_convert_utf8_to_latin1.cpp */
+// depends on "tables/utf8_to_utf16_tables.h"
+
+// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the
+// end of the code points. Only the least significant 12 bits of the mask
+// are accessed.
+// It returns how many bytes were consumed (up to 12).
+size_t convert_masked_utf8_to_latin1(const char *input,
+                           uint64_t utf8_end_of_code_point_mask,
+                           char *&latin1_output) {
+  // we use an approach where we try to process up to 12 input bytes.
+  // Why 12 input bytes and not 16? Because we are concerned with the size of
+  // the lookup tables. Also 12 is nicely divisible by two and three.
+  //
+  //
+  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
+  // beneficial to have fast paths that depend on branch prediction but have less latency.
+  // This results in more instructions but, potentially, also higher speeds.
+  //
+  const __m128i in = _mm_loadu_si128((__m128i *)input);
+  const __m128i in_second_half = _mm_loadu_si128((__m128i *)(input + 16));
+
+  const uint16_t input_utf8_end_of_code_point_mask =
+      utf8_end_of_code_point_mask & 0xfff; //we're only processing 12 bytes in case it`s not all ASCII
+
+  if((input_utf8_end_of_code_point_mask & 0xffffffff) == 0xffffffff) {
+    // Load the next 128 bits.
+
+    // Combine the two 128-bit registers into a single 256-bit register.
+    __m256i in_combined = _mm256_set_m128i(in_second_half, in);
+
+    // We process the data in chunks of 32 bytes.
+    _mm256_storeu_si256(reinterpret_cast<__m256i *>(latin1_output), in_combined);
+
+    latin1_output += 32; // We wrote 32 characters.
+    return 32; // We consumed 32 bytes.
+  }
+
+
+  if(((utf8_end_of_code_point_mask & 0xffff) == 0xffff)) {
+    // We process the data in chunks of 16 bytes.
+    _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in);
+    latin1_output += 16; // We wrote 16 characters.
+    return 16; // We consumed 16 bytes.
+  }
+  /// We do not have a fast path available, so we fallback.
+  const uint8_t idx =
+      tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
+  const uint8_t consumed =
+      tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
+  // this indicates an invalid input:
+  if(idx >= 64) { return consumed; }
+  // Here we should have (idx < 64), if not, there is a bug in the validation or elsewhere.
+  // SIX (6) input code-code units
+  // this is a relatively easy scenario
+  // we process SIX (6) input code-code units. The max length in bytes of six code
+  // code units spanning between 1 and 2 bytes each is 12 bytes. On processors
+  // where pdep/pext is fast, we might be able to use a small lookup table.
+  const __m128i sh =
+        _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
+  const __m128i perm = _mm_shuffle_epi8(in, sh);
+  const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
+  const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
+  __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
+  const __m128i latin1_packed = _mm_packus_epi16(composed,composed);
+  // writing 8 bytes even though we only care about the first 6 bytes.
+  // performance note: it would be faster to use _mm_storeu_si128, we should investigate.
+  _mm_storel_epi64((__m128i *)latin1_output, latin1_packed);
+  latin1_output += 6; // We wrote 6 bytes.
+  return consumed;
+}
+/* end file src/haswell/avx2_convert_utf8_to_latin1.cpp */
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdutf
+
+/* begin file src/generic/buf_block_reader.h */
+namespace simdutf {
+namespace haswell {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template
+struct buf_block_reader {
+public:
+  simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdutf_really_inline size_t block_index();
+  simdutf_really_inline bool has_full_block() const;
+  simdutf_really_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
+  simdutf_really_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdutf_unused static char * format_input_text_64(const uint8_t *text) {
+  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
+  for (size_t i=0; i); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdutf_unused static char * format_input_text(const simd8x64& in) {
+  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
+  in.store(reinterpret_cast(buf));
+  for (size_t i=0; i); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64)] = '\0';
+  return buf;
+}
+
+simdutf_unused static char * format_mask(uint64_t mask) {
+  static char *buf = reinterpret_cast(malloc(64 + 1));
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template
+simdutf_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template
+simdutf_really_inline size_t buf_block_reader::block_index() { return idx; }
+
+template
+simdutf_really_inline bool buf_block_reader::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template
+simdutf_really_inline const uint8_t *buf_block_reader::full_block() const {
+  return &buf[idx];
+}
+
+template
+simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template
+simdutf_really_inline void buf_block_reader::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdutf
+/* end file src/generic/buf_block_reader.h */
+/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+namespace simdutf {
+namespace haswell {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
+      // 0_______ ________ 
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ 
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ 
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ 
+      TOO_SHORT,
+      // 1110____ ________ 
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ 
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
+
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8 byte_2_high = input.shr<4>().lookup_16(
+      // ________ 0_______ 
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
 
-  return std::make_pair(buf, utf16_output);
-}
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
 
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
+      const simd8 prev_input, const simd8 sc) {
+    simd8 prev2 = input.prev<2>(prev_input);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
 
-template 
-std::pair avx2_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
-  const char32_t* start = buf;
-  const char32_t* end = buf + len;
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdutf_really_inline simd8 is_incomplete(const simd8 input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
+    };
+    const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]);
+    return input.gt_bits(max_value);
+  }
 
-  const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
+    // The last input we received
+    simd8 prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8 prev_incomplete;
 
-  while (buf + 8 + safety_margin <= end) {
-    __m256i in = _mm256_loadu_si256((__m256i*)buf);
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      simd8 sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
+    }
 
-    const __m256i v_00000000 = _mm256_setzero_si256();
-    const __m256i v_ffff0000 = _mm256_set1_epi32((int32_t)0xffff0000);
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdutf_really_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
 
-    // no bits set above 16th bit <=> can pack to UTF16 without surrogate pairs
-    const __m256i saturation_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_ffff0000), v_00000000);
-    const uint32_t saturation_bitmask = static_cast(_mm256_movemask_epi8(saturation_bytemask));
+    simdutf_really_inline void check_next_input(const simd8x64& input) {
+      if(simdutf_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+            "We support either two or four chunks per 64-byte block.");
+        if(simd8x64::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else if(simd8x64::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1];
 
-    if (saturation_bitmask == 0xffffffff) {
-      const __m256i v_f800 = _mm256_set1_epi32((uint32_t)0xf800);
-      const __m256i v_d800 = _mm256_set1_epi32((uint32_t)0xd800);
-      const __m256i forbidden_bytemask = _mm256_cmpeq_epi32(_mm256_and_si256(in, v_f800), v_d800);
-      if (static_cast(_mm256_movemask_epi8(forbidden_bytemask)) != 0x0) {
-        return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
       }
+    }
 
-      __m128i utf16_packed = _mm_packus_epi32(_mm256_castsi256_si128(in),_mm256_extractf128_si256(in,1));
-      if (big_endian) {
-        const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        utf16_packed = _mm_shuffle_epi8(utf16_packed, swap);
-      }
-      _mm_storeu_si128((__m128i*)utf16_output, utf16_packed);
-      utf16_output += 8;
-      buf += 8;
-    } else {
-      size_t forward = 7;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFF0000)==0) {
-          // will not generate a surrogate pair
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
-          *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
-        } else {
-          // will generate a surrogate pair
-          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
-          word -= 0x10000;
-          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
-          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
-          if (big_endian) {
-            high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
-            low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
-          }
-          *utf16_output++ = char16_t(high_surrogate);
-          *utf16_output++ = char16_t(low_surrogate);
-        }
-      }
-      buf += k;
+    // do not forget to call check_eof!
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
     }
-  }
 
-  return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
-}
-/* end file src/haswell/avx2_convert_utf32_to_utf16.cpp */
+  }; // struct utf8_checker
+} // namespace utf8_validation
+
+using utf8_validation::utf8_checker;
+
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h
-/* begin file src/generic/buf_block_reader.h */
+/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+/* begin file src/generic/utf8_validation/utf8_validator.h */
 namespace simdutf {
 namespace haswell {
 namespace {
+namespace utf8_validation {
 
-// Walks through a buffer in block-sized increments, loading the last part with spaces
-template
-struct buf_block_reader {
-public:
-  simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
-  simdutf_really_inline size_t block_index();
-  simdutf_really_inline bool has_full_block() const;
-  simdutf_really_inline const uint8_t *full_block() const;
-  /**
-   * Get the last block, padded with spaces.
-   *
-   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
-   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
-   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
-   *
-   * @return the number of effective characters in the last block.
-   */
-  simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
-  simdutf_really_inline void advance();
-private:
-  const uint8_t *buf;
-  const size_t len;
-  const size_t lenminusstep;
-  size_t idx;
-};
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return !c.errors();
+}
 
-// Routines to print masks and text for debugging bitmask operations
-simdutf_unused static char * format_input_text_64(const uint8_t *text) {
-  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
-  for (size_t i=0; i); i++) {
-    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
-  }
-  buf[sizeof(simd8x64)] = '\0';
-  return buf;
+bool generic_validate_utf8(const char * input, size_t length) {
+  return generic_validate_utf8(reinterpret_cast(input),length);
+}
+
+/**
+ * Validates that the string is actual UTF-8 and stops on errors.
+ */
+template
+result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    size_t count{0};
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      c.check_next_input(in);
+      if(c.errors()) {
+        if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
+        result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input + count), length - count);
+        res.count += count;
+        return res;
+      }
+      reader.advance();
+      count += 64;
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    if (c.errors()) {
+      if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
+      result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input) + count, length - count);
+      res.count += count;
+      return res;
+    } else {
+      return result(error_code::SUCCESS, length);
+    }
+}
+
+result generic_validate_utf8_with_errors(const char * input, size_t length) {
+  return generic_validate_utf8_with_errors(reinterpret_cast(input),length);
+}
+
+template
+bool generic_validate_ascii(const uint8_t * input, size_t length) {
+    buf_block_reader<64> reader(input, length);
+    uint8_t blocks[64]{};
+    simd::simd8x64 running_or(blocks);
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      running_or |= in;
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    running_or |= in;
+    return running_or.is_ascii();
 }
 
-// Routines to print masks and text for debugging bitmask operations
-simdutf_unused static char * format_input_text(const simd8x64& in) {
-  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
-  in.store(reinterpret_cast(buf));
-  for (size_t i=0; i); i++) {
-    if (buf[i] < ' ') { buf[i] = '_'; }
-  }
-  buf[sizeof(simd8x64)] = '\0';
-  return buf;
+bool generic_validate_ascii(const char * input, size_t length) {
+  return generic_validate_ascii(reinterpret_cast(input),length);
 }
 
-simdutf_unused static char * format_mask(uint64_t mask) {
-  static char *buf = reinterpret_cast(malloc(64 + 1));
-  for (size_t i=0; i<64; i++) {
-    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+template
+result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
+  buf_block_reader<64> reader(input, length);
+  size_t count{0};
+  while (reader.has_full_block()) {
+    simd::simd8x64 in(reader.full_block());
+    if (!in.is_ascii()) {
+      result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
+      return result(res.error, count + res.count);
+    }
+    reader.advance();
+
+    count += 64;
+  }
+  uint8_t block[64]{};
+  reader.get_remainder(block);
+  simd::simd8x64 in(block);
+  if (!in.is_ascii()) {
+    result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
+    return result(res.error, count + res.count);
+  } else {
+    return result(error_code::SUCCESS, length);
   }
-  buf[64] = '\0';
-  return buf;
 }
 
-template
-simdutf_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+result generic_validate_ascii_with_errors(const char * input, size_t length) {
+  return generic_validate_ascii_with_errors(reinterpret_cast(input),length);
+}
 
-template
-simdutf_really_inline size_t buf_block_reader::block_index() { return idx; }
+} // namespace utf8_validation
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdutf
+/* end file src/generic/utf8_validation/utf8_validator.h */
+// transcoding from UTF-8 to UTF-16
+/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
 
-template
-simdutf_really_inline bool buf_block_reader::has_full_block() const {
-  return idx < lenminusstep;
-}
 
-template
-simdutf_really_inline const uint8_t *buf_block_reader::full_block() const {
-  return &buf[idx];
-}
+namespace simdutf {
+namespace haswell {
+namespace {
+namespace utf8_to_utf16 {
 
-template
-simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const {
-  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
-  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
-  std::memcpy(dst, buf + idx, len - idx);
-  return len - idx;
-}
+using namespace simd;
 
-template
-simdutf_really_inline void buf_block_reader::advance() {
-  idx += STEP_SIZE;
+template 
+simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
+    char16_t* utf16_output) noexcept {
+  // The implementation is not specific to haswell and should be moved to the generic directory.
+  size_t pos = 0;
+  char16_t* start{utf16_output};
+  const size_t safety_margin = 16; // to avoid overruns!
+  while(pos + 64 + safety_margin <= size) {
+    // this loop could be unrolled further. For example, we could process the mask
+    // far more than 64 bytes.
+    simd8x64 in(reinterpret_cast(input + pos));
+    if(in.is_ascii()) {
+      in.store_ascii_as_utf16(utf16_output);
+      utf16_output += 64;
+      pos += 64;
+    } else {
+      // Slow path. We hope that the compiler will recognize that this is a slow path.
+      // Anything that is not a continuation mask is a 'leading byte', that is, the
+      // start of a new code point.
+      uint64_t utf8_continuation_mask = in.lt(-65 + 1);
+      // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
+      uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+      // The *start* of code points is not so useful, rather, we want the *end* of code points.
+      uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+      // We process in blocks of up to 12 bytes except possibly
+      // for fast paths which may process up to 16 bytes. For the
+      // slow path to work, we should have at least 12 input bytes left.
+      size_t max_starting_point = (pos + 64) - 12;
+      // Next loop is going to run at least five times when using solely
+      // the slow/regular path, and at least four times if there are fast paths.
+      while(pos < max_starting_point) {
+        // Performance note: our ability to compute 'consumed' and
+        // then shift and recompute is critical. If there is a
+        // latency of, say, 4 cycles on getting 'consumed', then
+        // the inner loop might have a total latency of about 6 cycles.
+        // Yet we process between 6 to 12 inputs bytes, thus we get
+        // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+        // for this section of the code. Hence, there is a limit
+        // to how much we can further increase this latency before
+        // it seriously harms performance.
+        //
+        // Thus we may allow convert_masked_utf8_to_utf16 to process
+        // more bytes at a time under a fast-path mode where 16 bytes
+        // are consumed at once (e.g., when encountering ASCII).
+        size_t consumed = convert_masked_utf8_to_utf16(input + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+        pos += consumed;
+        utf8_end_of_code_point_mask >>= consumed;
+      }
+      // At this point there may remain between 0 and 12 bytes in the
+      // 64-byte block. These bytes will be processed again. So we have an
+      // 80% efficiency (in the worst case). In practice we expect an
+      // 85% to 90% efficiency.
+    }
+  }
+  utf16_output += scalar::utf8_to_utf16::convert_valid(input + pos, size - pos, utf16_output);
+  return utf16_output - start;
 }
 
+} // namespace utf8_to_utf16
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-/* end file src/generic/buf_block_reader.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h
-/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
+/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+
+
 namespace simdutf {
 namespace haswell {
 namespace {
-namespace utf8_validation {
-
+namespace utf8_to_utf16 {
 using namespace simd;
 
+
   simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
 // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
 // Bit 1 = Too Long (ASCII followed by continuation)
@@ -21598,323 +25177,281 @@ using namespace simd;
     );
     const simd8 byte_2_high = input.shr<4>().lookup_16(
       // ________ 0_______ 
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
-
-      // ________ 1000____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
-      // ________ 1001____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
-      // ________ 101_____
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
-      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
-
-      // ________ 11______
-      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
-    );
-    return (byte_1_high & byte_1_low & byte_2_high);
-  }
-  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
-      const simd8 prev_input, const simd8 sc) {
-    simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
-
-  //
-  // Return nonzero if there are incomplete multibyte characters at the end of the block:
-  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
-  //
-  simdutf_really_inline simd8 is_incomplete(const simd8 input) {
-    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
-    // ... 1111____ 111_____ 11______
-    static const uint8_t max_array[32] = {
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
-    };
-    const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]);
-    return input.gt_bits(max_value);
-  }
-
-  struct utf8_checker {
-    // If this is nonzero, there has been a UTF-8 error.
-    simd8 error;
-    // The last input we received
-    simd8 prev_input_block;
-    // Whether the last input we received was incomplete (used for ASCII fast path)
-    simd8 prev_incomplete;
-
-    //
-    // Check whether the current bytes are valid UTF-8.
-    //
-    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
-      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
-      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
-      simd8 prev1 = input.prev<1>(prev_input);
-      simd8 sc = check_special_cases(input, prev1);
-      this->error |= check_multibyte_lengths(input, prev_input, sc);
-    }
-
-    // The only problem that can happen at EOF is that a multibyte character is too short
-    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
-    // too large in the first of two bytes.
-    simdutf_really_inline void check_eof() {
-      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
-      // possibly finish them.
-      this->error |= this->prev_incomplete;
-    }
-
-    simdutf_really_inline void check_next_input(const simd8x64& input) {
-      if(simdutf_likely(is_ascii(input))) {
-        this->error |= this->prev_incomplete;
-      } else {
-        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-        static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-            "We support either two or four chunks per 64-byte block.");
-        if(simd8x64::NUM_CHUNKS == 2) {
-          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
-          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-        } else if(simd8x64::NUM_CHUNKS == 4) {
-          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
-          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
-        }
-        this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]);
-        this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1];
-
-      }
-    }
-
-    // do not forget to call check_eof!
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
-    }
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
 
-  }; // struct utf8_checker
-} // namespace utf8_validation
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
 
-using utf8_validation::utf8_checker;
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
+      const simd8 prev_input, const simd8 sc) {
+    simd8 prev2 = input.prev<2>(prev_input);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
 
-} // unnamed namespace
-} // namespace haswell
-} // namespace simdutf
-/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h
-/* begin file src/generic/utf8_validation/utf8_validator.h */
-namespace simdutf {
-namespace haswell {
-namespace {
-namespace utf8_validation {
 
-/**
- * Validates that the string is actual UTF-8.
- */
-template
-bool generic_validate_utf8(const uint8_t * input, size_t length) {
-    checker c{};
-    buf_block_reader<64> reader(input, length);
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      c.check_next_input(in);
-      reader.advance();
+  struct validating_transcoder {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
+
+    validating_transcoder() : error(uint8_t(0)) {}
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      simd8 sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
     }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    c.check_next_input(in);
-    reader.advance();
-    c.check_eof();
-    return !c.errors();
-}
 
-bool generic_validate_utf8(const char * input, size_t length) {
-  return generic_validate_utf8(reinterpret_cast(input),length);
-}
 
-/**
- * Validates that the string is actual UTF-8 and stops on errors.
- */
-template
-result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
-    checker c{};
-    buf_block_reader<64> reader(input, length);
-    size_t count{0};
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      c.check_next_input(in);
-      if(c.errors()) {
-        if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
-        result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input + count), length - count);
-        res.count += count;
-        return res;
+    template 
+    simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
+      size_t pos = 0;
+      char16_t* start{utf16_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      reader.advance();
-      count += 64;
-    }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    c.check_next_input(in);
-    reader.advance();
-    c.check_eof();
-    if (c.errors()) {
-      if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
-      result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input) + count, length - count);
-      res.count += count;
-      return res;
-    } else {
-      return result(error_code::SUCCESS, length);
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf16(utf16_output);
+          utf16_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) { return 0; }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_utf16::convert(in + pos, size - pos, utf16_output);
+        if(howmany == 0) { return 0; }
+        utf16_output += howmany;
+      }
+      return utf16_output - start;
     }
-}
-
-result generic_validate_utf8_with_errors(const char * input, size_t length) {
-  return generic_validate_utf8_with_errors(reinterpret_cast(input),length);
-}
 
-template
-bool generic_validate_ascii(const uint8_t * input, size_t length) {
-    buf_block_reader<64> reader(input, length);
-    uint8_t blocks[64]{};
-    simd::simd8x64 running_or(blocks);
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      running_or |= in;
-      reader.advance();
+    template 
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
+      size_t pos = 0;
+      char16_t* start{utf16_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf16(utf16_output);
+          utf16_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          if (errors()) {
+            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+            result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+            res.count += pos;
+            return res;
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        res.count += pos;
+        return res;
+      }
+      if(pos < size) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        if (res.error) {    // In case of error, we want the error position
+          res.count += pos;
+          return res;
+        } else {    // In case of success, we want the number of word written
+          utf16_output += res.count;
+        }
+      }
+      return result(error_code::SUCCESS, utf16_output - start);
     }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    running_or |= in;
-    return running_or.is_ascii();
-}
-
-bool generic_validate_ascii(const char * input, size_t length) {
-  return generic_validate_ascii(reinterpret_cast(input),length);
-}
 
-template
-result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
-  buf_block_reader<64> reader(input, length);
-  size_t count{0};
-  while (reader.has_full_block()) {
-    simd::simd8x64 in(reader.full_block());
-    if (!in.is_ascii()) {
-      result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
-      return result(res.error, count + res.count);
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
     }
-    reader.advance();
-
-    count += 64;
-  }
-  uint8_t block[64]{};
-  reader.get_remainder(block);
-  simd::simd8x64 in(block);
-  if (!in.is_ascii()) {
-    result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
-    return result(res.error, count + res.count);
-  } else {
-    return result(error_code::SUCCESS, length);
-  }
-}
-
-result generic_validate_ascii_with_errors(const char * input, size_t length) {
-  return generic_validate_ascii_with_errors(reinterpret_cast(input),length);
-}
 
-} // namespace utf8_validation
+  }; // struct utf8_checker
+} // utf8_to_utf16 namespace
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-/* end file src/generic/utf8_validation/utf8_validator.h */
-// transcoding from UTF-8 to UTF-16
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h
-/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
-
+/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+// transcoding from UTF-8 to UTF-32
+/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
 
 namespace simdutf {
 namespace haswell {
 namespace {
-namespace utf8_to_utf16 {
+namespace utf8_to_utf32 {
 
 using namespace simd;
 
-template 
+
 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
-    char16_t* utf16_output) noexcept {
-  // The implementation is not specific to haswell and should be moved to the generic directory.
+    char32_t* utf32_output) noexcept {
   size_t pos = 0;
-  char16_t* start{utf16_output};
+  char32_t* start{utf32_output};
   const size_t safety_margin = 16; // to avoid overruns!
   while(pos + 64 + safety_margin <= size) {
-    // this loop could be unrolled further. For example, we could process the mask
-    // far more than 64 bytes.
     simd8x64 in(reinterpret_cast(input + pos));
     if(in.is_ascii()) {
-      in.store_ascii_as_utf16(utf16_output);
-      utf16_output += 64;
+      in.store_ascii_as_utf32(utf32_output);
+      utf32_output += 64;
       pos += 64;
     } else {
-      // Slow path. We hope that the compiler will recognize that this is a slow path.
-      // Anything that is not a continuation mask is a 'leading byte', that is, the
-      // start of a new code point.
-      uint64_t utf8_continuation_mask = in.lt(-65 + 1);
-      // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
-      uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-      // The *start* of code points is not so useful, rather, we want the *end* of code points.
-      uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-      // We process in blocks of up to 12 bytes except possibly
-      // for fast paths which may process up to 16 bytes. For the
-      // slow path to work, we should have at least 12 input bytes left.
-      size_t max_starting_point = (pos + 64) - 12;
-      // Next loop is going to run at least five times when using solely
-      // the slow/regular path, and at least four times if there are fast paths.
-      while(pos < max_starting_point) {
-        // Performance note: our ability to compute 'consumed' and
-        // then shift and recompute is critical. If there is a
-        // latency of, say, 4 cycles on getting 'consumed', then
-        // the inner loop might have a total latency of about 6 cycles.
-        // Yet we process between 6 to 12 inputs bytes, thus we get
-        // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-        // for this section of the code. Hence, there is a limit
-        // to how much we can further increase this latency before
-        // it seriously harms performance.
-        //
-        // Thus we may allow convert_masked_utf8_to_utf16 to process
-        // more bytes at a time under a fast-path mode where 16 bytes
-        // are consumed at once (e.g., when encountering ASCII).
-        size_t consumed = convert_masked_utf8_to_utf16(input + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
-        pos += consumed;
-        utf8_end_of_code_point_mask >>= consumed;
+    // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
+    uint64_t utf8_continuation_mask = in.lt(-65 + 1);
+    uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+    uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+    size_t max_starting_point = (pos + 64) - 12;
+    while(pos < max_starting_point) {
+      size_t consumed = convert_masked_utf8_to_utf32(input + pos,
+                          utf8_end_of_code_point_mask, utf32_output);
+      pos += consumed;
+      utf8_end_of_code_point_mask >>= consumed;
       }
-      // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block. These bytes will be processed again. So we have an
-      // 80% efficiency (in the worst case). In practice we expect an
-      // 85% to 90% efficiency.
     }
   }
-  utf16_output += scalar::utf8_to_utf16::convert_valid(input + pos, size - pos, utf16_output);
-  return utf16_output - start;
+  utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
+  return utf32_output - start;
 }
 
-} // namespace utf8_to_utf16
+
+} // namespace utf8_to_utf32
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h
-/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
+/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
 
 
 namespace simdutf {
 namespace haswell {
 namespace {
-namespace utf8_to_utf16 {
+namespace utf8_to_utf32 {
 using namespace simd;
 
 
@@ -22035,28 +25572,28 @@ using namespace simd;
     }
 
 
-    template 
-    simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
+
+    simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
-      char16_t* start{utf16_output};
+      char32_t* start{utf32_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 8; margin--) {
+      for(; margin > 0 && leading_byte < 4; margin--) {
         leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf16(utf16_output);
-          utf16_output += 64;
+          input.store_ascii_as_utf32(utf32_output);
+          utf32_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -22090,8 +25627,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
+            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
+                            utf8_end_of_code_point_mask, utf32_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -22103,35 +25640,34 @@ using namespace simd;
       }
       if(errors()) { return 0; }
       if(pos < size) {
-        size_t howmany  = scalar::utf8_to_utf16::convert(in + pos, size - pos, utf16_output);
+        size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
         if(howmany == 0) { return 0; }
-        utf16_output += howmany;
+        utf32_output += howmany;
       }
-      return utf16_output - start;
+      return utf32_output - start;
     }
 
-    template 
-    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
-      char16_t* start{utf16_output};
+      char32_t* start{utf32_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 8; margin--) {
+      for(; margin > 0 && leading_byte < 4; margin--) {
         leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf16(utf16_output);
-          utf16_output += 64;
+          input.store_ascii_as_utf32(utf32_output);
+          utf32_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -22148,9 +25684,7 @@ using namespace simd;
             this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
           }
           if (errors()) {
-            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-            result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+            result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
             res.count += pos;
             return res;
           }
@@ -22172,8 +25706,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
+            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
+                            utf8_end_of_code_point_mask, utf32_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -22184,24 +25718,20 @@ using namespace simd;
         }
       }
       if(errors()) {
-        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
         res.count += pos;
         return res;
       }
       if(pos < size) {
-        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
         if (res.error) {    // In case of error, we want the error position
           res.count += pos;
           return res;
         } else {    // In case of success, we want the number of word written
-          utf16_output += res.count;
+          utf32_output += res.count;
         }
       }
-      return result(error_code::SUCCESS, utf16_output - start);
+      return result(error_code::SUCCESS, utf32_output - start);
     }
 
     simdutf_really_inline bool errors() const {
@@ -22209,70 +25739,138 @@ using namespace simd;
     }
 
   }; // struct utf8_checker
-} // utf8_to_utf16 namespace
+} // utf8_to_utf32 namespace
+} // unnamed namespace
+} // namespace haswell
+} // namespace simdutf
+/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
+// other functions
+/* begin file src/generic/utf8.h */
+
+namespace simdutf {
+namespace haswell {
+namespace {
+namespace utf8 {
+
+using namespace simd;
+
+simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    for(;pos + 64 <= size; pos += 64) {
+      simd8x64 input(reinterpret_cast(in + pos));
+      uint64_t utf8_continuation_mask = input.gt(-65);
+      count += count_ones(utf8_continuation_mask);
+    }
+    return count + scalar::utf8::count_code_points(in + pos, size - pos);
+}
+
+simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    // This algorithm could no doubt be improved!
+    for(;pos + 64 <= size; pos += 64) {
+      simd8x64 input(reinterpret_cast(in + pos));
+      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+      // We count one word for anything that is not a continuation (so
+      // leading bytes).
+      count += 64 - count_ones(utf8_continuation_mask);
+      int64_t utf8_4byte = input.gteq_unsigned(240);
+      count += count_ones(utf8_4byte);
+    }
+    return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
+}
+
+
+simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) {
+    return count_code_points(in, size);
+}
+} // utf8 namespace
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
-// transcoding from UTF-8 to UTF-32
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h
-/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
-
+/* end file src/generic/utf8.h */
+/* begin file src/generic/utf16.h */
 namespace simdutf {
 namespace haswell {
 namespace {
-namespace utf8_to_utf32 {
+namespace utf16 {
 
-using namespace simd;
+template 
+simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    for(;pos < size/32*32; pos += 32) {
+      simd16x32 input(reinterpret_cast(in + pos));
+      if (!match_system(big_endian)) { input.swap_bytes(); }
+      uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
+      count += count_ones(not_pair) / 2;
+    }
+    return count + scalar::utf16::count_code_points(in + pos, size - pos);
+}
 
+template 
+simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    // This algorithm could no doubt be improved!
+    for(;pos < size/32*32; pos += 32) {
+      simd16x32 input(reinterpret_cast(in + pos));
+      if (!match_system(big_endian)) { input.swap_bytes(); }
+      uint64_t ascii_mask = input.lteq(0x7F);
+      uint64_t twobyte_mask = input.lteq(0x7FF);
+      uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
 
-simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
-    char32_t* utf32_output) noexcept {
-  size_t pos = 0;
-  char32_t* start{utf32_output};
-  const size_t safety_margin = 16; // to avoid overruns!
-  while(pos + 64 + safety_margin <= size) {
-    simd8x64 in(reinterpret_cast(input + pos));
-    if(in.is_ascii()) {
-      in.store_ascii_as_utf32(utf32_output);
-      utf32_output += 64;
-      pos += 64;
-    } else {
-    // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
-    uint64_t utf8_continuation_mask = in.lt(-65 + 1);
-    uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-    uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-    size_t max_starting_point = (pos + 64) - 12;
-    while(pos < max_starting_point) {
-      size_t consumed = convert_masked_utf8_to_utf32(input + pos,
-                          utf8_end_of_code_point_mask, utf32_output);
-      pos += consumed;
-      utf8_end_of_code_point_mask >>= consumed;
-      }
+      size_t ascii_count = count_ones(ascii_mask) / 2;
+      size_t twobyte_count = count_ones(twobyte_mask & ~ ascii_mask) / 2;
+      size_t threebyte_count = count_ones(not_pair_mask & ~ twobyte_mask) / 2;
+      size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2;
+      count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + ascii_count;
     }
-  }
-  utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
-  return utf32_output - start;
+    return count + scalar::utf16::utf8_length_from_utf16(in + pos, size - pos);
+}
+
+template 
+simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
+    return count_code_points(in, size);
 }
 
+simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
+  size_t pos = 0;
 
-} // namespace utf8_to_utf32
+  while (pos < size/32*32) {
+    simd16x32 input(reinterpret_cast(in + pos));
+    input.swap_bytes();
+    input.store(reinterpret_cast(output));
+    pos += 32;
+    output += 32;
+  }
+
+  scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
+}
+
+} // utf16
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h
-/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
+/* end file src/generic/utf16.h */
+
+
+// transcoding from UTF-8 to Latin 1
+/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */
 
 
 namespace simdutf {
 namespace haswell {
 namespace {
-namespace utf8_to_utf32 {
+namespace utf8_to_latin1 {
 using namespace simd;
 
 
   simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// For UTF-8 to Latin 1, we can allow any ASCII character, and any continuation byte,
+// but the non-ASCII leading bytes must be 0b11000011 or 0b11000010 and nothing else.
+//
 // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
 // Bit 1 = Too Long (ASCII followed by continuation)
 // Bit 2 = Overlong 3-byte
@@ -22299,6 +25897,7 @@ using namespace simd;
                                                 // 1111011_ 1000____
                                                 // 11111___ 1000____
     constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+    constexpr const uint8_t FORBIDDEN  = 0xff;
 
     const simd8 byte_1_high = prev1.shr<4>().lookup_16(
       // 0_______ ________ 
@@ -22309,11 +25908,11 @@ using namespace simd;
       // 1100____ ________ 
       TOO_SHORT | OVERLONG_2,
       // 1101____ ________ 
-      TOO_SHORT,
+      FORBIDDEN,
       // 1110____ ________ 
-      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      FORBIDDEN,
       // 1111____ ________ 
-      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+      FORBIDDEN
     );
     constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
     const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
@@ -22326,23 +25925,23 @@ using namespace simd;
       CARRY,
 
       // ____0100 ________
-      CARRY | TOO_LARGE,
+      FORBIDDEN,
       // ____0101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      FORBIDDEN,
       // ____011_ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      FORBIDDEN,
+      FORBIDDEN,
 
       // ____1___ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
       // ____1101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN
     );
     const simd8 byte_2_high = input.shr<4>().lookup_16(
       // ________ 0_______ 
@@ -22362,15 +25961,6 @@ using namespace simd;
     );
     return (byte_1_high & byte_1_low & byte_2_high);
   }
-  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
-      const simd8 prev_input, const simd8 sc) {
-    simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
-
 
   struct validating_transcoder {
     // If this is nonzero, there has been a UTF-8 error.
@@ -22384,33 +25974,31 @@ using namespace simd;
       // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
       // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
       simd8 prev1 = input.prev<1>(prev_input);
-      simd8 sc = check_special_cases(input, prev1);
-      this->error |= check_multibyte_lengths(input, prev_input, sc);
+      this->error |= check_special_cases(input, prev1);
     }
 
 
-
-    simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
+    simdutf_really_inline size_t convert(const char* in, size_t size, char* latin1_output) {
       size_t pos = 0;
-      char32_t* start{utf32_output};
+      char* start{latin1_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 4; margin--) {
-        leading_byte += (int8_t(in[margin-1]) > -65);
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65); //twos complement of -65 is 1011 1111 ...
       }
-      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf32(utf32_output);
-          utf32_output += 64;
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -22426,7 +26014,7 @@ using namespace simd;
             this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
             this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
           }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in this case, we also have ASCII to account for.
           uint64_t utf8_leading_mask = ~utf8_continuation_mask;
           uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
           // We process in blocks of up to 12 bytes except possibly
@@ -22444,8 +26032,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
-                            utf8_end_of_code_point_mask, utf32_output);
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -22457,34 +26045,34 @@ using namespace simd;
       }
       if(errors()) { return 0; }
       if(pos < size) {
-        size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
+        size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
         if(howmany == 0) { return 0; }
-        utf32_output += howmany;
+        latin1_output += howmany;
       }
-      return utf32_output - start;
+      return latin1_output - start;
     }
 
-    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char* latin1_output) {
       size_t pos = 0;
-      char32_t* start{utf32_output};
+      char* start{latin1_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 4; margin--) {
+      for(; margin > 0 && leading_byte < 8; margin--) {
         leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf32(utf32_output);
-          utf32_output += 64;
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -22501,7 +26089,9 @@ using namespace simd;
             this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
           }
           if (errors()) {
-            result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
+            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+            result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
             res.count += pos;
             return res;
           }
@@ -22523,8 +26113,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
-                            utf8_end_of_code_point_mask, utf32_output);
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -22535,20 +26125,24 @@ using namespace simd;
         }
       }
       if(errors()) {
-        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
         res.count += pos;
         return res;
       }
       if(pos < size) {
-        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
         if (res.error) {    // In case of error, we want the error position
           res.count += pos;
           return res;
         } else {    // In case of success, we want the number of word written
-          utf32_output += res.count;
+          latin1_output += res.count;
         }
       }
-      return result(error_code::SUCCESS, utf32_output - start);
+      return result(error_code::SUCCESS, latin1_output - start);
     }
 
     simdutf_really_inline bool errors() const {
@@ -22556,124 +26150,88 @@ using namespace simd;
     }
 
   }; // struct utf8_checker
-} // utf8_to_utf32 namespace
+} // utf8_to_latin1 namespace
 } // unnamed namespace
 } // namespace haswell
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
-// other functions
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h
-/* begin file src/generic/utf8.h */
-
-namespace simdutf {
-namespace haswell {
-namespace {
-namespace utf8 {
-
-using namespace simd;
-
-simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    for(;pos + 64 <= size; pos += 64) {
-      simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      count += 64 - count_ones(utf8_continuation_mask);
-    }
-    return count + scalar::utf8::count_code_points(in + pos, size - pos);
-}
-
-
-simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    // This algorithm could no doubt be improved!
-    for(;pos + 64 <= size; pos += 64) {
-      simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      // We count one word for anything that is not a continuation (so
-      // leading bytes).
-      count += 64 - count_ones(utf8_continuation_mask);
-      int64_t utf8_4byte = input.gteq_unsigned(240);
-      count += count_ones(utf8_4byte);
-    }
-    return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
-}
+/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */
+/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */
 
 
-simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) {
-    return count_code_points(in, size);
-}
-} // utf8 namespace
-} // unnamed namespace
-} // namespace haswell
-} // namespace simdutf
-/* end file src/generic/utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h
-/* begin file src/generic/utf16.h */
 namespace simdutf {
 namespace haswell {
 namespace {
-namespace utf16 {
-
-template 
-simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    for(;pos + 32 <= size; pos += 32) {
-      simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
-      uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
-      count += count_ones(not_pair) / 2;
-    }
-    return count + scalar::utf16::count_code_points(in + pos, size - pos);
-}
+namespace utf8_to_latin1 {
+using namespace simd;
 
-template 
-simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    // This algorithm could no doubt be improved!
-    for(;pos + 32 <= size; pos += 32) {
-      simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
-      uint64_t ascii_mask = input.lteq(0x7F);
-      uint64_t twobyte_mask = input.lteq(0x7FF);
-      uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
 
-      size_t ascii_count = count_ones(ascii_mask) / 2;
-      size_t twobyte_count = count_ones(twobyte_mask & ~ ascii_mask) / 2;
-      size_t threebyte_count = count_ones(not_pair_mask & ~ twobyte_mask) / 2;
-      size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2;
-      count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + ascii_count;
+    simdutf_really_inline size_t convert_valid(const char* in, size_t size, char* latin1_output) {
+      size_t pos = 0;
+      char* start{latin1_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65); //twos complement of -65 is 1011 1111 ...
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in this case, we also have ASCII to account for.
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
+        if(howmany == 0) { return 0; }
+        latin1_output += howmany;
+      }
+      return latin1_output - start;
     }
-    return count + scalar::utf16::utf8_length_from_utf16(in + pos, size - pos);
-}
-
-template 
-simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
-    return count_code_points(in, size);
-}
-
-simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
-  size_t pos = 0;
-
-  while (pos + 32 <= size) {
-    simd16x32 input(reinterpret_cast(in + pos));
-    input.swap_bytes();
-    input.store(reinterpret_cast(output));
-    pos += 32;
-    output += 32;
-  }
 
-  scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
-}
-
-} // utf16
-} // unnamed namespace
-} // namespace haswell
-} // namespace simdutf
-/* end file src/generic/utf16.h */
+  }; 
+}   // utf8_to_latin1 namespace
+}   // unnamed namespace
+}   // namespace haswell
+ // namespace simdutf
+/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */
 
 namespace simdutf {
 namespace haswell {
@@ -22766,6 +26324,73 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors(const char
   }
 }
 
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
+  std::pair ret = avx2_convert_latin1_to_utf8(buf, len, utf8_output);
+  size_t converted_chars = ret.second - utf8_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
+      ret.first, len - (ret.first - buf), ret.second);
+    converted_chars += scalar_converted_chars;
+  }
+
+  return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+    std::pair ret = avx2_convert_latin1_to_utf16(buf, len, utf16_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf16_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+    std::pair ret = avx2_convert_latin1_to_utf16(buf, len, utf16_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf16_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+    std::pair ret = avx2_convert_latin1_to_utf32(buf, len, utf32_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf32_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  utf8_to_latin1::validating_transcoder converter;
+  return converter.convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
+  utf8_to_latin1::validating_transcoder converter;
+  return converter.convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* input, size_t size,
+    char* latin1_output) const noexcept {
+   return utf8_to_latin1::convert_valid(input, size,  latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
   utf8_to_utf16::validating_transcoder converter;
   return converter.convert(buf, len, utf16_output);
@@ -22811,6 +26436,77 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
   return utf8_to_utf32::convert_valid(input, size,  utf32_output);
 }
 
+
+simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = haswell::avx2_convert_utf16_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = haswell::avx2_convert_utf16_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = avx2_convert_utf16_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = avx2_convert_utf16_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement a custom function
+  return convert_utf16be_to_latin1(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: implement a custom function
+  return convert_utf16le_to_latin1(buf, len, latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
   std::pair ret = haswell::avx2_convert_utf16_to_utf8(buf, len, utf8_output);
   if (ret.first == nullptr) { return 0; }
@@ -22838,7 +26534,7 @@ simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_
 }
 
 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -22851,12 +26547,12 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(c
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -22869,7 +26565,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(c
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -22894,8 +26590,43 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t*
   return saved_bytes;
 }
 
+simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = avx2_convert_utf32_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return scalar::utf32_to_latin1::convert_with_errors(buf,len,latin1_output);
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = avx2_convert_utf32_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.count != len) {
+    result scalar_res = scalar::utf32_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  return convert_utf32_to_latin1(buf,len,latin1_output);
+}
+
 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
@@ -22907,7 +26638,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(con
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -22938,7 +26669,7 @@ simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16
 }
 
 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -22951,12 +26682,12 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -22969,7 +26700,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -23004,7 +26735,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
@@ -23016,12 +26747,12 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = haswell::avx2_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
@@ -23033,7 +26764,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -23069,6 +26800,18 @@ simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t
   return utf8::count_code_points(input, length);
 }
 
+simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
+  return count_utf8(buf,len);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
+  return scalar::utf16::latin1_length_from_utf16(length);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
+  return scalar::utf32::latin1_length_from_utf32(length);
+}
+
 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
   return utf16::utf8_length_from_utf16(input, length);
 }
@@ -23085,10 +26828,61 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf16be(const char1
   return utf16::utf32_length_from_utf16(input, length);
 }
 
+
+simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf16_length_from_latin1(length);
+}
+
 simdutf_warn_unused size_t implementation::utf16_length_from_utf8(const char * input, size_t length) const noexcept {
   return utf8::utf16_length_from_utf8(input, length);
 }
 
+
+simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf32_length_from_latin1(length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char *input, size_t len) const noexcept {
+  const uint8_t *data = reinterpret_cast(input);
+  size_t answer = len / sizeof(__m256i) * sizeof(__m256i);
+  size_t i = 0;
+  __m256i four_64bits = _mm256_setzero_si256();
+  while (i + sizeof(__m256i) <= len) {
+    __m256i runner = _mm256_setzero_si256();
+    // We can do up to 255 loops without overflow.
+    size_t iterations = (len - i) / sizeof(__m256i);
+    if (iterations > 255) {
+      iterations = 255;
+    }
+    size_t max_i = i + iterations * sizeof(__m256i) - sizeof(__m256i);
+    for (; i + 4*sizeof(__m256i) <= max_i; i += 4*sizeof(__m256i)) {
+      __m256i input1 = _mm256_loadu_si256((const __m256i *)(data + i));
+      __m256i input2 = _mm256_loadu_si256((const __m256i *)(data + i + sizeof(__m256i)));
+      __m256i input3 = _mm256_loadu_si256((const __m256i *)(data + i + 2*sizeof(__m256i)));
+      __m256i input4 = _mm256_loadu_si256((const __m256i *)(data + i + 3*sizeof(__m256i)));
+      __m256i input12 = _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input1),
+              _mm256_cmpgt_epi8(_mm256_setzero_si256(), input2));
+      __m256i input23 = _mm256_add_epi8(_mm256_cmpgt_epi8(_mm256_setzero_si256(), input3),
+              _mm256_cmpgt_epi8(_mm256_setzero_si256(), input4));
+      __m256i input1234 = _mm256_add_epi8(input12, input23);
+      runner = _mm256_sub_epi8(
+          runner, input1234);
+    }
+    for (; i <= max_i; i += sizeof(__m256i)) {
+      __m256i input_256_chunk = _mm256_loadu_si256((const __m256i *)(data + i));
+      runner = _mm256_sub_epi8(
+          runner, _mm256_cmpgt_epi8(_mm256_setzero_si256(), input_256_chunk));
+    }
+    four_64bits = _mm256_add_epi64(
+        four_64bits, _mm256_sad_epu8(runner, _mm256_setzero_si256()));
+  }
+  answer += _mm256_extract_epi64(four_64bits, 0) +
+            _mm256_extract_epi64(four_64bits, 1) +
+            _mm256_extract_epi64(four_64bits, 2) +
+            _mm256_extract_epi64(four_64bits, 3);
+  return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast(data + i), len - i);
+}
+
 simdutf_warn_unused size_t implementation::utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept {
   const __m256i v_00000000 = _mm256_setzero_si256();
   const __m256i v_ffffff80 = _mm256_set1_epi32((uint32_t)0xffffff80);
@@ -23131,13 +26925,12 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_
 }
 
 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
-  return scalar::utf8::count_code_points(input, length);
+  return utf8::count_code_points(input, length);
 }
 
 } // namespace haswell
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/haswell/end.h
 /* begin file src/simdutf/haswell/end.h */
 #if SIMDUTF_CAN_ALWAYS_RUN_HASWELL
 // nothing needed.
@@ -23153,14 +26946,12 @@ SIMDUTF_POP_DISABLE_WARNINGS
 /* end file src/haswell/implementation.cpp */
 #endif
 #if SIMDUTF_IMPLEMENTATION_PPC64
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=ppc64/implementation.cpp
 /* begin file src/ppc64/implementation.cpp */
 
 
 
 
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/begin.h
 /* begin file src/simdutf/ppc64/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "ppc64"
 // #define SIMDUTF_IMPLEMENTATION ppc64
@@ -23198,7 +26989,6 @@ simdutf_really_inline simd8 must_be_2_3_continuation(const simd8
 } // namespace ppc64
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h
 /* begin file src/generic/buf_block_reader.h */
 namespace simdutf {
 namespace ppc64 {
@@ -23293,7 +27083,6 @@ simdutf_really_inline void buf_block_reader::advance() {
 } // namespace ppc64
 } // namespace simdutf
 /* end file src/generic/buf_block_reader.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h
 /* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
 namespace simdutf {
 namespace ppc64 {
@@ -23482,7 +27271,6 @@ using utf8_validation::utf8_checker;
 } // namespace ppc64
 } // namespace simdutf
 /* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h
 /* begin file src/generic/utf8_validation/utf8_validator.h */
 namespace simdutf {
 namespace ppc64 {
@@ -23610,7 +27398,6 @@ result generic_validate_ascii_with_errors(const char * input, size_t length) {
 } // namespace simdutf
 /* end file src/generic/utf8_validation/utf8_validator.h */
 // transcoding from UTF-8 to UTF-16
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h
 /* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
 
 
@@ -23685,7 +27472,6 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
 } // namespace ppc64
 } // namespace simdutf
 /* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h
 /* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
 
 
@@ -23993,7 +27779,6 @@ using namespace simd;
 } // namespace simdutf
 /* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
 // transcoding from UTF-8 to UTF-32
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h
 /* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
 
 namespace simdutf {
@@ -24039,7 +27824,6 @@ simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
 } // namespace ppc64
 } // namespace simdutf
 /* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h
 /* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
 
 
@@ -24340,7 +28124,6 @@ using namespace simd;
 } // namespace simdutf
 /* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
 // other functions
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h
 /* begin file src/generic/utf8.h */
 
 namespace simdutf {
@@ -24355,13 +28138,12 @@ simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
     size_t count = 0;
     for(;pos + 64 <= size; pos += 64) {
       simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      count += 64 - count_ones(utf8_continuation_mask);
+      uint64_t utf8_continuation_mask = input.gt(-65);
+      count += count_ones(utf8_continuation_mask);
     }
     return count + scalar::utf8::count_code_points(in + pos, size - pos);
 }
 
-
 simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
     size_t pos = 0;
     size_t count = 0;
@@ -24387,7 +28169,6 @@ simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size)
 } // namespace ppc64
 } // namespace simdutf
 /* end file src/generic/utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h
 /* begin file src/generic/utf16.h */
 namespace simdutf {
 namespace ppc64 {
@@ -24398,9 +28179,9 @@ template 
 simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
     size_t pos = 0;
     size_t count = 0;
-    for(;pos + 32 <= size; pos += 32) {
+    for(;pos < size/32*32; pos += 32) {
       simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
+      if (!match_system(big_endian)) { input.swap_bytes(); }
       uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
       count += count_ones(not_pair) / 2;
     }
@@ -24412,9 +28193,9 @@ simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t s
     size_t pos = 0;
     size_t count = 0;
     // This algorithm could no doubt be improved!
-    for(;pos + 32 <= size; pos += 32) {
+    for(;pos < size/32*32; pos += 32) {
       simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
+      if (!match_system(big_endian)) { input.swap_bytes(); }
       uint64_t ascii_mask = input.lteq(0x7F);
       uint64_t twobyte_mask = input.lteq(0x7FF);
       uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
@@ -24436,7 +28217,7 @@ simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t
 simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
   size_t pos = 0;
 
-  while (pos + 32 <= size) {
+  while (pos < size/32*32) {
     simd16x32 input(reinterpret_cast(in + pos));
     input.swap_bytes();
     input.store(reinterpret_cast(output));
@@ -24686,15 +28467,12 @@ simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * i
 } // namespace ppc64
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/ppc64/end.h
 /* begin file src/simdutf/ppc64/end.h */
 /* end file src/simdutf/ppc64/end.h */
 /* end file src/ppc64/implementation.cpp */
 #endif
 #if SIMDUTF_IMPLEMENTATION_WESTMERE
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/implementation.cpp
 /* begin file src/westmere/implementation.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/begin.h
 /* begin file src/simdutf/westmere/begin.h */
 // redefining SIMDUTF_IMPLEMENTATION to "westmere"
 // #define SIMDUTF_IMPLEMENTATION westmere
@@ -24732,7 +28510,84 @@ simdutf_really_inline simd8 must_be_2_3_continuation(const simd8
   return simd8(is_third_byte | is_fourth_byte) > int8_t(0);
 }
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_detect_encodings.cpp
+/* begin file src/westmere/internal/loader.cpp */
+namespace internal {
+namespace westmere {
+
+/* begin file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */
+/*
+* reads a vector of uint16 values
+* bits after 11th are ignored
+* first 11 bits are encoded into utf8
+* !important! utf8_output must have at least 16 writable bytes
+*/
+
+inline void write_v_u16_11bits_to_utf8(
+  const __m128i v_u16,
+  char*& utf8_output,
+  const __m128i one_byte_bytemask,
+  const uint16_t one_byte_bitmask
+) {
+  // 0b1100_0000_1000_0000
+  const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080);
+  // 0b0001_1111_0000_0000
+  const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
+  // 0b0000_0000_0011_1111
+  const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
+
+  // 1. prepare 2-byte values
+          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
+          // expected output   : [110a|aaaa|10bb|bbbb] x 8
+
+  // t0 = [000a|aaaa|bbbb|bb00]
+  const __m128i t0 = _mm_slli_epi16(v_u16, 2);
+  // t1 = [000a|aaaa|0000|0000]
+  const __m128i t1 = _mm_and_si128(t0, v_1f00);
+  // t2 = [0000|0000|00bb|bbbb]
+  const __m128i t2 = _mm_and_si128(v_u16, v_003f);
+  // t3 = [000a|aaaa|00bb|bbbb]
+  const __m128i t3 = _mm_or_si128(t1, t2);
+  // t4 = [110a|aaaa|10bb|bbbb]
+  const __m128i t4 = _mm_or_si128(t3, v_c080);
+
+  // 2. merge ASCII and 2-byte codewords
+  const __m128i utf8_unpacked = _mm_blendv_epi8(t4, v_u16, one_byte_bytemask);
+
+  // 3. prepare bitmask for 8-bit lookup
+  //    one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a - LSB)
+  const uint16_t m0 = one_byte_bitmask & 0x5555;  // m0 = 0h0g0f0e0d0c0b0a
+  const uint16_t m1 = static_cast(m0 >> 7);                    // m1 = 00000000h0g0f0e0
+  const uint8_t  m2 = static_cast((m0 | m1) & 0xff);           // m2 =         hdgcfbea
+  // 4. pack the bytes
+  const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
+  const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
+  const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
+
+  // 5. store bytes
+  _mm_storeu_si128((__m128i*)utf8_output, utf8_packed);
+
+  // 6. adjust pointers
+  utf8_output += row[0];
+};
+
+inline void write_v_u16_11bits_to_utf8(
+  const __m128i v_u16,
+  char*& utf8_output,
+  const __m128i v_0000,
+  const __m128i v_ff80
+) {
+  // no bits set above 7th bit
+  const __m128i one_byte_bytemask = _mm_cmpeq_epi16(_mm_and_si128(v_u16, v_ff80), v_0000);
+  const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8(one_byte_bytemask));
+
+  write_v_u16_11bits_to_utf8(
+    v_u16, utf8_output, one_byte_bytemask, one_byte_bitmask);
+};
+/* end file src/westmere/internal/write_v_u16_11bits_to_utf8.cpp */
+
+} // namespace westmere
+} // namespace internal
+/* end file src/westmere/internal/loader.cpp */
 /* begin file src/westmere/sse_detect_encodings.cpp */
 template
 // len is known to be a multiple of 2 when this is called
@@ -24785,7 +28640,7 @@ int sse_detect_encodings(const char * buf, size_t len) {
             // To be valid UTF-32, a surrogate cannot be in the two most significant bytes of any 32-bit word.
             // On the other hand, to be valid UTF-16LE, at least one surrogate must be in the two most significant
             // bytes of a 32-bit word since they always come in pairs in UTF-16LE.
-            // Note that we always proceed in multiple of 4 before this point so there is no offset in 32-bit words.
+            // Note that we always proceed in multiple of 4 before this point so there is no offset in 32-bit code units.
 
             if (((surrogates_bitmask0 | surrogates_bitmask1) & 0xaaaa) != 0) {
                 is_utf32 = false;
@@ -24942,10 +28797,9 @@ int sse_detect_encodings(const char * buf, size_t len) {
 }
 /* end file src/westmere/sse_detect_encodings.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_validate_utf16.cpp
 /* begin file src/westmere/sse_validate_utf16.cpp */
 /*
-    In UTF-16 words in range 0xD800 to 0xDFFF have special meaning.
+    In UTF-16 code units in range 0xD800 to 0xDFFF have special meaning.
 
     In a vectorized algorithm we want to examine the most significant
     nibble in order to select a fast path. If none of highest nibbles
@@ -24981,7 +28835,7 @@ int sse_detect_encodings(const char * buf, size_t len) {
       0   0   1   0   1   0   0   0   b = a << 1
       1   1   1   1   1   1   1   0   c = V | a | b
                                   ^
-                                  the last bit can be zero, we just consume 7 words
+                                  the last bit can be zero, we just consume 7 code units
                                   and recheck this word in the next iteration
 */
 
@@ -25026,7 +28880,7 @@ const char16_t* sse_validate_utf16(const char16_t* input, size_t size) {
             //
             //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
 
-            // V - non-surrogate words
+            // V - non-surrogate code units
             //     V = not surrogates_wordmask
             const uint16_t V = static_cast(~surrogates_bitmask);
 
@@ -25047,10 +28901,10 @@ const char16_t* sse_validate_utf16(const char16_t* input, size_t size) {
 
             if (c == 0xffff) {
                 // The whole input register contains valid UTF-16, i.e.,
-                // either single words or proper surrogate pairs.
+                // either single code units or proper surrogate pairs.
                 input += 16;
             } else if (c == 0x7fff) {
-                // The 15 lower words of the input register contains valid UTF-16.
+                // The 15 lower code units of the input register contains valid UTF-16.
                 // The 15th word may be either a low or high surrogate. It the next
                 // iteration we 1) check if the low surrogate is followed by a high
                 // one, 2) reject sole high surrogate.
@@ -25104,7 +28958,7 @@ const result sse_validate_utf16_with_errors(const char16_t* input, size_t size)
             //
             //    Fact: high surrogate has 11th bit set (3rd bit in the higher word)
 
-            // V - non-surrogate words
+            // V - non-surrogate code units
             //     V = not surrogates_wordmask
             const uint16_t V = static_cast(~surrogates_bitmask);
 
@@ -25125,10 +28979,10 @@ const result sse_validate_utf16_with_errors(const char16_t* input, size_t size)
 
             if (c == 0xffff) {
                 // The whole input register contains valid UTF-16, i.e.,
-                // either single words or proper surrogate pairs.
+                // either single code units or proper surrogate pairs.
                 input += 16;
             } else if (c == 0x7fff) {
-                // The 15 lower words of the input register contains valid UTF-16.
+                // The 15 lower code units of the input register contains valid UTF-16.
                 // The 15th word may be either a low or high surrogate. It the next
                 // iteration we 1) check if the low surrogate is followed by a high
                 // one, 2) reject sole high surrogate.
@@ -25142,7 +28996,6 @@ const result sse_validate_utf16_with_errors(const char16_t* input, size_t size)
     return result(error_code::SUCCESS, input - start);
 }
 /* end file src/westmere/sse_validate_utf16.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_validate_utf32le.cpp
 /* begin file src/westmere/sse_validate_utf32le.cpp */
 /* Returns:
    - pointer to the last unprocessed character (a scalar fallback should check the rest);
@@ -25208,7 +29061,144 @@ const result sse_validate_utf32le_with_errors(const char32_t* input, size_t size
 }
 /* end file src/westmere/sse_validate_utf32le.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf8_to_utf16.cpp
+/* begin file src/westmere/sse_convert_latin1_to_utf8.cpp */
+std::pair sse_convert_latin1_to_utf8(
+  const char* latin_input,
+  const size_t latin_input_length,
+  char* utf8_output) {
+  const char* end = latin_input + latin_input_length;
+
+  const __m128i v_0000 = _mm_setzero_si128();
+  // 0b1000_0000
+  const __m128i v_80 = _mm_set1_epi8((uint8_t)0x80);
+  // 0b1111_1111_1000_0000
+  const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
+
+  const __m128i latin_1_half_into_u16_byte_mask = _mm_setr_epi8(
+      0, '\x80',
+      1, '\x80',
+      2, '\x80',
+      3, '\x80',
+      4, '\x80',
+      5, '\x80',
+      6, '\x80',
+      7, '\x80'
+    );
+
+  const __m128i latin_2_half_into_u16_byte_mask = _mm_setr_epi8(
+      8, '\x80',
+      9, '\x80',
+      10, '\x80',
+      11, '\x80',
+      12, '\x80',
+      13, '\x80',
+      14, '\x80',
+      15, '\x80'
+    );
+
+  // each latin1 takes 1-2 utf8 bytes
+  // slow path writes useful 8-15 bytes twice (eagerly writes 16 bytes and then adjust the pointer)
+  // so the last write can exceed the utf8_output size by 8-1 bytes 
+  // by reserving 8 extra input bytes, we expect the output to have 8-16 bytes free
+  while (latin_input + 16 + 8 <= end) {
+    // Load 16 Latin1 characters (16 bytes) into a 128-bit register
+    __m128i v_latin = _mm_loadu_si128((__m128i*)latin_input);
+
+
+    if (_mm_testz_si128(v_latin, v_80)) {// ASCII fast path!!!!
+      _mm_storeu_si128((__m128i*)utf8_output, v_latin);
+      latin_input += 16;
+      utf8_output += 16;
+      continue;
+    }
+    
+
+    // assuming a/b are bytes and A/B are uint16 of the same value
+    // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA
+    __m128i v_u16_latin_1_half = _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask);
+    // aaaa_aaaa_bbbb_bbbb -> BBBB_BBBB
+    __m128i v_u16_latin_2_half = _mm_shuffle_epi8(v_latin, latin_2_half_into_u16_byte_mask);
+
+
+    internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, v_ff80);
+    internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_2_half, utf8_output, v_0000, v_ff80);
+    latin_input += 16;
+  }
+
+  if (latin_input + 16 <= end) {
+    // Load 16 Latin1 characters (16 bytes) into a 128-bit register
+    __m128i v_latin = _mm_loadu_si128((__m128i*)latin_input);
+
+    if (_mm_testz_si128(v_latin, v_80)) {// ASCII fast path!!!!
+      _mm_storeu_si128((__m128i*)utf8_output, v_latin);
+      latin_input += 16;
+      utf8_output += 16;
+    } else {
+      // assuming a/b are bytes and A/B are uint16 of the same value
+      // aaaa_aaaa_bbbb_bbbb -> AAAA_AAAA
+      __m128i v_u16_latin_1_half = _mm_shuffle_epi8(v_latin, latin_1_half_into_u16_byte_mask);
+      internal::westmere::write_v_u16_11bits_to_utf8(v_u16_latin_1_half, utf8_output, v_0000, v_ff80);
+      latin_input += 8;
+    }
+  }
+
+  return std::make_pair(latin_input, utf8_output);
+};
+/* end file src/westmere/sse_convert_latin1_to_utf8.cpp */
+/* begin file src/westmere/sse_convert_latin1_to_utf16.cpp */
+template 
+std::pair sse_convert_latin1_to_utf16(const char *latin1_input, size_t len,
+                                                              char16_t *utf16_output) {
+    size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16
+    for (size_t i = 0; i < rounded_len; i += 16) {
+        // Load 16 Latin1 characters into a 128-bit register
+        __m128i in = _mm_loadu_si128(reinterpret_cast(&latin1_input[i]));
+        __m128i out1 = big_endian ? _mm_unpacklo_epi8(_mm_setzero_si128(), in)
+                         : _mm_unpacklo_epi8(in, _mm_setzero_si128());
+        __m128i out2 = big_endian ? _mm_unpackhi_epi8(_mm_setzero_si128(), in)
+                         : _mm_unpackhi_epi8(in, _mm_setzero_si128());
+        // Zero extend each Latin1 character to 16-bit integers and store the results back to memory
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(&utf16_output[i]), out1);
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(&utf16_output[i + 8]), out2);
+    }
+    // return pointers pointing to where we left off
+    return std::make_pair(latin1_input + rounded_len, utf16_output + rounded_len);
+}
+/* end file src/westmere/sse_convert_latin1_to_utf16.cpp */
+/* begin file src/westmere/sse_convert_latin1_to_utf32.cpp */
+std::pair sse_convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
+    const char* end = buf + len;
+
+    while (buf + 16 <= end) {
+        // Load 16 Latin1 characters (16 bytes) into a 128-bit register
+        __m128i in = _mm_loadu_si128((__m128i*)buf);
+
+        // Shift input to process next 4 bytes
+        __m128i in_shifted1 = _mm_srli_si128(in, 4);
+        __m128i in_shifted2 = _mm_srli_si128(in, 8);
+        __m128i in_shifted3 = _mm_srli_si128(in, 12);
+
+        // expand 8-bit to 32-bit unit      
+        __m128i out1 = _mm_cvtepu8_epi32(in);
+        __m128i out2 = _mm_cvtepu8_epi32(in_shifted1);
+        __m128i out3 = _mm_cvtepu8_epi32(in_shifted2);
+        __m128i out4 = _mm_cvtepu8_epi32(in_shifted3);
+
+        _mm_storeu_si128((__m128i*)utf32_output, out1);
+        _mm_storeu_si128((__m128i*)(utf32_output + 4), out2);
+        _mm_storeu_si128((__m128i*)(utf32_output + 8), out3);
+        _mm_storeu_si128((__m128i*)(utf32_output + 12), out4);
+
+        utf32_output += 16;
+        buf += 16;
+    }
+
+    return std::make_pair(buf, utf32_output);
+}
+
+/* end file src/westmere/sse_convert_latin1_to_utf32.cpp */
+
+
 /* begin file src/westmere/sse_convert_utf8_to_utf16.cpp */
 // depends on "tables/utf8_to_utf16_tables.h"
 
@@ -25249,7 +29239,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     return 16; // We consumed 16 bytes.
   }
   if(((utf8_end_of_code_point_mask & 0xFFFF) == 0xaaaa)) {
-    // We want to take 8 2-byte UTF-8 words and turn them into 8 2-byte UTF-16 words.
+    // We want to take 8 2-byte UTF-8 code units and turn them into 8 2-byte UTF-16 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25262,7 +29252,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     return 16;
   }
   if(input_utf8_end_of_code_point_mask == 0x924) {
-    // We want to take 4 3-byte UTF-8 words and turn them into 4 2-byte UTF-16 words.
+    // We want to take 4 3-byte UTF-8 code units and turn them into 4 2-byte UTF-16 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25289,10 +29279,10 @@ size_t convert_masked_utf8_to_utf16(const char *input,
   const uint8_t consumed =
       tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
   if (idx < 64) {
-    // SIX (6) input code-words
+    // SIX (6) input code-code units
     // this is a relatively easy scenario
-    // we process SIX (6) input code-words. The max length in bytes of six code
-    // words spanning between 1 and 2 bytes each is 12 bytes. On processors
+    // we process SIX (6) input code-code units. The max length in bytes of six code
+    // code units spanning between 1 and 2 bytes each is 12 bytes. On processors
     // where pdep/pext is fast, we might be able to use a small lookup table.
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
@@ -25304,7 +29294,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     _mm_storeu_si128((__m128i *)utf16_output, composed);
     utf16_output += 6; // We wrote 12 bytes, 6 code points.
   } else if (idx < 145) {
-    // FOUR (4) input code-words
+    // FOUR (4) input code-code units
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25323,7 +29313,7 @@ size_t convert_masked_utf8_to_utf16(const char *input,
     _mm_storeu_si128((__m128i *)utf16_output, composed_repacked);
     utf16_output += 4;
   } else if (idx < 209) {
-    // TWO (2) input code-words
+    // TWO (2) input code-code units
     //////////////
     // There might be garbage inputs where a leading byte mascarades as a four-byte
     // leading byte (by being followed by 3 continuation byte), but is not greater than
@@ -25393,7 +29383,6 @@ size_t convert_masked_utf8_to_utf16(const char *input,
   return consumed;
 }
 /* end file src/westmere/sse_convert_utf8_to_utf16.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf8_to_utf32.cpp
 /* begin file src/westmere/sse_convert_utf8_to_utf32.cpp */
 // depends on "tables/utf8_to_utf16_tables.h"
 
@@ -25428,7 +29417,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     return 16; // We consumed 16 bytes.
   }
   if(((utf8_end_of_code_point_mask & 0xffff) == 0xaaaa)) {
-    // We want to take 8 2-byte UTF-8 words and turn them into 8 4-byte UTF-32 words.
+    // We want to take 8 2-byte UTF-8 code units and turn them into 8 4-byte UTF-32 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25441,7 +29430,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     return 16;
   }
   if(input_utf8_end_of_code_point_mask == 0x924) {
-    // We want to take 4 3-byte UTF-8 words and turn them into 4 4-byte UTF-32 words.
+    // We want to take 4 3-byte UTF-8 code units and turn them into 4 4-byte UTF-32 code units.
     // There is probably a more efficient sequence, but the following might do.
     const __m128i sh = _mm_setr_epi8(2, 1, 0, -1, 5, 4, 3, -1, 8, 7, 6, -1, 11, 10, 9, -1);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25466,10 +29455,10 @@ size_t convert_masked_utf8_to_utf32(const char *input,
   const uint8_t consumed =
       tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
   if (idx < 64) {
-    // SIX (6) input code-words
+    // SIX (6) input code-code units
     // this is a relatively easy scenario
-    // we process SIX (6) input code-words. The max length in bytes of six code
-    // words spanning between 1 and 2 bytes each is 12 bytes. On processors
+    // we process SIX (6) input code-code units. The max length in bytes of six code
+    // code units spanning between 1 and 2 bytes each is 12 bytes. On processors
     // where pdep/pext is fast, we might be able to use a small lookup table.
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
@@ -25481,7 +29470,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(composed,8)));
     utf32_output += 6; // We wrote 12 bytes, 6 code points.
   } else if (idx < 145) {
-    // FOUR (4) input code-words
+    // FOUR (4) input code-code units
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25498,7 +29487,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
     _mm_storeu_si128((__m128i *)utf32_output, composed);
     utf32_output += 4;
   } else if (idx < 209) {
-    // TWO (2) input code-words
+    // TWO (2) input code-code units
     const __m128i sh =
         _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
     const __m128i perm = _mm_shuffle_epi8(in, sh);
@@ -25523,13 +29512,130 @@ size_t convert_masked_utf8_to_utf32(const char *input,
   }
   return consumed;
 }
-/* end file src/westmere/sse_convert_utf8_to_utf32.cpp */
+/* end file src/westmere/sse_convert_utf8_to_utf32.cpp */
+/* begin file src/westmere/sse_convert_utf8_to_latin1.cpp */
+// depends on "tables/utf8_to_utf16_tables.h"
+
+
+// Convert up to 12 bytes from utf8 to latin1 using a mask indicating the
+// end of the code points. Only the least significant 12 bits of the mask
+// are accessed.
+// It returns how many bytes were consumed (up to 12).
+size_t convert_masked_utf8_to_latin1(const char *input,
+                           uint64_t utf8_end_of_code_point_mask,
+                           char *&latin1_output) {
+  // we use an approach where we try to process up to 12 input bytes.
+  // Why 12 input bytes and not 16? Because we are concerned with the size of
+  // the lookup tables. Also 12 is nicely divisible by two and three.
+  //
+  //
+  // Optimization note: our main path below is load-latency dependent. Thus it is maybe
+  // beneficial to have fast paths that depend on branch prediction but have less latency.
+  // This results in more instructions but, potentially, also higher speeds.
+  //
+  const __m128i in = _mm_loadu_si128((__m128i *)input);
+  const uint16_t input_utf8_end_of_code_point_mask =
+      utf8_end_of_code_point_mask & 0xfff; //we're only processing 12 bytes in case it`s not all ASCII
+  if(((utf8_end_of_code_point_mask & 0xffff) == 0xffff)) {
+    // We process the data in chunks of 16 bytes.
+    _mm_storeu_si128(reinterpret_cast<__m128i *>(latin1_output), in);
+    latin1_output += 16; // We wrote 16 characters.
+    return 16; // We consumed 16 bytes.
+  }
+  /// We do not have a fast path available, so we fallback.
+  const uint8_t idx =
+      tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][0];
+  const uint8_t consumed =
+      tables::utf8_to_utf16::utf8bigindex[input_utf8_end_of_code_point_mask][1];
+  // this indicates an invalid input:
+  if(idx >= 64) { return consumed; }
+  // Here we should have (idx < 64), if not, there is a bug in the validation or elsewhere.
+  // SIX (6) input code-code units
+  // this is a relatively easy scenario
+  // we process SIX (6) input code-code units. The max length in bytes of six code
+  // code units spanning between 1 and 2 bytes each is 12 bytes. On processors
+  // where pdep/pext is fast, we might be able to use a small lookup table.
+  const __m128i sh =
+        _mm_loadu_si128((const __m128i *)tables::utf8_to_utf16::shufutf8[idx]);
+  const __m128i perm = _mm_shuffle_epi8(in, sh);
+  const __m128i ascii = _mm_and_si128(perm, _mm_set1_epi16(0x7f));
+  const __m128i highbyte = _mm_and_si128(perm, _mm_set1_epi16(0x1f00));
+  __m128i composed = _mm_or_si128(ascii, _mm_srli_epi16(highbyte, 2));
+  const __m128i latin1_packed = _mm_packus_epi16(composed,composed);
+  // writing 8 bytes even though we only care about the first 6 bytes.
+  // performance note: it would be faster to use _mm_storeu_si128, we should investigate.
+  _mm_storel_epi64((__m128i *)latin1_output, latin1_packed);
+  latin1_output += 6; // We wrote 6 bytes.
+  return consumed;
+}
+/* end file src/westmere/sse_convert_utf8_to_latin1.cpp */
+
+/* begin file src/westmere/sse_convert_utf16_to_latin1.cpp */
+template 
+std::pair sse_convert_utf16_to_latin1(const char16_t* buf, size_t len, char* latin1_output) {
+  const char16_t* end = buf + len;
+  while (buf + 8 <= end) {
+    // Load 8 UTF-16 characters into 128-bit SSE register
+    __m128i in = _mm_loadu_si128(reinterpret_cast(buf));
+
+    if (!match_system(big_endian)) {
+      const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+      in = _mm_shuffle_epi8(in, swap);
+    }
+
+    __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00);
+    if (_mm_testz_si128(in, high_byte_mask)) {
+      // Pack 16-bit characters into 8-bit and store in latin1_output
+      __m128i latin1_packed = _mm_packus_epi16(in, in);
+      _mm_storel_epi64(reinterpret_cast<__m128i*>(latin1_output), latin1_packed);
+      // Adjust pointers for next iteration
+      buf += 8;
+      latin1_output += 8;
+    } else {
+      return std::make_pair(nullptr, reinterpret_cast(latin1_output));
+    }
+  } // while
+  return std::make_pair(buf, latin1_output);
+}
+
+template 
+std::pair sse_convert_utf16_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) {
+  const char16_t* start = buf;
+  const char16_t* end = buf + len;
+  while (buf + 8 <= end) {
+    __m128i in = _mm_loadu_si128(reinterpret_cast(buf));
+
+    if (!big_endian) {
+      const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+      in = _mm_shuffle_epi8(in, swap);
+    }
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf16_to_utf8.cpp
+    __m128i high_byte_mask = _mm_set1_epi16((int16_t)0xFF00);
+    if (_mm_testz_si128(in, high_byte_mask)) {
+      __m128i latin1_packed = _mm_packus_epi16(in, in);
+      _mm_storel_epi64(reinterpret_cast<__m128i*>(latin1_output), latin1_packed);
+      buf += 8;
+      latin1_output += 8;
+    } else {
+      // Fallback to scalar code for handling errors
+      for(int k = 0; k < 8; k++) {
+        uint16_t word = !match_system(big_endian) ? scalar::utf16::swap_bytes(buf[k]) : buf[k];
+        if(word <= 0xff) {
+          *latin1_output++ = char(word);
+        } else {
+          return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), latin1_output);
+        }
+      }
+      buf += 8;
+    }
+  } // while
+  return std::make_pair(result(error_code::SUCCESS, buf - start), latin1_output);
+}
+/* end file src/westmere/sse_convert_utf16_to_latin1.cpp */
 /* begin file src/westmere/sse_convert_utf16_to_utf8.cpp */
 /*
     The vectorized algorithm works on single SSE register i.e., it
-    loads eight 16-bit words.
+    loads eight 16-bit code units.
 
     We consider three cases:
     1. an input register contains no surrogates and each value
@@ -25541,7 +29647,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
 
     Ad 1.
 
-    When values are less than 0x0800, it means that a 16-bit words
+    When values are less than 0x0800, it means that a 16-bit code unit
     can be converted into: 1) single UTF8 byte (when it's an ASCII
     char) or 2) two UTF8 bytes.
 
@@ -25555,7 +29661,7 @@ size_t convert_masked_utf8_to_utf32(const char *input,
 
     Ad 2.
 
-    When values fit in 16-bit words, but are above 0x07ff, then
+    When values fit in 16-bit code units, but are above 0x07ff, then
     a single word may produce one, two or three UTF8 bytes.
 
     We prepare data for all these three cases in two registers.
@@ -25588,7 +29694,6 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf,
   const __m128i v_0000 = _mm_setzero_si128();
   const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
   const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
-  const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080);
   const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
 
   while (buf + 16 + safety_margin <= end) {
@@ -25637,44 +29742,9 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf,
     const uint16_t one_or_two_bytes_bitmask = static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask));
 
     if (one_or_two_bytes_bitmask == 0xffff) {
-          // 1. prepare 2-byte values
-          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
-          // expected output   : [110a|aaaa|10bb|bbbb] x 8
-          const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
-          const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
-
-          // t0 = [000a|aaaa|bbbb|bb00]
-          const __m128i t0 = _mm_slli_epi16(in, 2);
-          // t1 = [000a|aaaa|0000|0000]
-          const __m128i t1 = _mm_and_si128(t0, v_1f00);
-          // t2 = [0000|0000|00bb|bbbb]
-          const __m128i t2 = _mm_and_si128(in, v_003f);
-          // t3 = [000a|aaaa|00bb|bbbb]
-          const __m128i t3 = _mm_or_si128(t1, t2);
-          // t4 = [110a|aaaa|10bb|bbbb]
-          const __m128i t4 = _mm_or_si128(t3, v_c080);
-
-          // 2. merge ASCII and 2-byte codewords
-          const __m128i utf8_unpacked = _mm_blendv_epi8(t4, in, one_byte_bytemask);
-
-          // 3. prepare bitmask for 8-bit lookup
-          //    one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a - LSB)
-          const uint16_t m0 = one_byte_bitmask & 0x5555;  // m0 = 0h0g0f0e0d0c0b0a
-          const uint16_t m1 = static_cast(m0 >> 7);                    // m1 = 00000000h0g0f0e0
-          const uint8_t  m2 = static_cast((m0 | m1) & 0xff);           // m2 =         hdgcfbea
-          // 4. pack the bytes
-          const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
-          const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
-          const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
-
-          // 5. store bytes
-          _mm_storeu_si128((__m128i*)utf8_output, utf8_packed);
-
-          // 6. adjust pointers
-          buf += 8;
-          utf8_output += row[0];
-          continue;
-
+      internal::westmere::write_v_u16_11bits_to_utf8(in, utf8_output, one_byte_bytemask, one_byte_bitmask);
+      buf += 8;
+      continue;
     }
 
     // 1. Check if there are any surrogate word in the input chunk.
@@ -25688,7 +29758,7 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf,
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x0000) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
         const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
                                                 0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
 
@@ -25697,7 +29767,7 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf,
            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-          We expand the input word (16-bit) into two words (32-bit), thus
+          We expand the input word (16-bit) into two code units (32-bit), thus
           we have room for four bytes. However, we need five distinct bit
           layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -25708,7 +29778,7 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf,
           either byte 1 for case #2 or byte 2 for case #3. Note that they
           differ by exactly one bit.
 
-          Finally from these two words we build proper UTF-8 sequence, taking
+          Finally from these two code units we build proper UTF-8 sequence, taking
           into account the case (i.e, the number of bytes to write).
         */
         /**
@@ -25736,15 +29806,15 @@ std::pair sse_convert_utf16_to_utf8(const char16_t* buf,
         const __m128i s4 = _mm_xor_si128(s3, m0);
 #undef simdutf_vec
 
-        // 4. expand words 16-bit => 32-bit
+        // 4. expand code units 16-bit => 32-bit
         const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
         const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
 
-        // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+        // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
         const uint16_t mask = (one_byte_bitmask & 0x5555) |
                               (one_or_two_bytes_bitmask & 0xaaaa);
         if(mask == 0) {
-          // We only have three-byte words. Use fast path.
+          // We only have three-byte code units. Use fast path.
           const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
           const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
           const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
@@ -25828,7 +29898,6 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
   const __m128i v_0000 = _mm_setzero_si128();
   const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
   const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
-  const __m128i v_c080 = _mm_set1_epi16((int16_t)0xc080);
   const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
 
   while (buf + 16 + safety_margin <= end) {
@@ -25877,44 +29946,9 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
     const uint16_t one_or_two_bytes_bitmask = static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask));
 
     if (one_or_two_bytes_bitmask == 0xffff) {
-          // 1. prepare 2-byte values
-          // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
-          // expected output   : [110a|aaaa|10bb|bbbb] x 8
-          const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
-          const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
-
-          // t0 = [000a|aaaa|bbbb|bb00]
-          const __m128i t0 = _mm_slli_epi16(in, 2);
-          // t1 = [000a|aaaa|0000|0000]
-          const __m128i t1 = _mm_and_si128(t0, v_1f00);
-          // t2 = [0000|0000|00bb|bbbb]
-          const __m128i t2 = _mm_and_si128(in, v_003f);
-          // t3 = [000a|aaaa|00bb|bbbb]
-          const __m128i t3 = _mm_or_si128(t1, t2);
-          // t4 = [110a|aaaa|10bb|bbbb]
-          const __m128i t4 = _mm_or_si128(t3, v_c080);
-
-          // 2. merge ASCII and 2-byte codewords
-          const __m128i utf8_unpacked = _mm_blendv_epi8(t4, in, one_byte_bytemask);
-
-          // 3. prepare bitmask for 8-bit lookup
-          //    one_byte_bitmask = hhggffeeddccbbaa -- the bits are doubled (h - MSB, a - LSB)
-          const uint16_t m0 = one_byte_bitmask & 0x5555;  // m0 = 0h0g0f0e0d0c0b0a
-          const uint16_t m1 = static_cast(m0 >> 7);                    // m1 = 00000000h0g0f0e0
-          const uint8_t  m2 = static_cast((m0 | m1) & 0xff);           // m2 =         hdgcfbea
-          // 4. pack the bytes
-          const uint8_t* row = &simdutf::tables::utf16_to_utf8::pack_1_2_utf8_bytes[m2][0];
-          const __m128i shuffle = _mm_loadu_si128((__m128i*)(row + 1));
-          const __m128i utf8_packed = _mm_shuffle_epi8(utf8_unpacked, shuffle);
-
-          // 5. store bytes
-          _mm_storeu_si128((__m128i*)utf8_output, utf8_packed);
-
-          // 6. adjust pointers
-          buf += 8;
-          utf8_output += row[0];
-          continue;
-
+      internal::westmere::write_v_u16_11bits_to_utf8(in, utf8_output, one_byte_bytemask, one_byte_bitmask);
+      buf += 8;
+      continue;
     }
 
     // 1. Check if there are any surrogate word in the input chunk.
@@ -25928,7 +29962,7 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x0000) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
         const __m128i dup_even = _mm_setr_epi16(0x0000, 0x0202, 0x0404, 0x0606,
                                                 0x0808, 0x0a0a, 0x0c0c, 0x0e0e);
 
@@ -25937,7 +29971,7 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
            2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
            3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-          We expand the input word (16-bit) into two words (32-bit), thus
+          We expand the input word (16-bit) into two code units (32-bit), thus
           we have room for four bytes. However, we need five distinct bit
           layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -25948,7 +29982,7 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
           either byte 1 for case #2 or byte 2 for case #3. Note that they
           differ by exactly one bit.
 
-          Finally from these two words we build proper UTF-8 sequence, taking
+          Finally from these two code units we build proper UTF-8 sequence, taking
           into account the case (i.e, the number of bytes to write).
         */
         /**
@@ -25976,15 +30010,15 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
         const __m128i s4 = _mm_xor_si128(s3, m0);
 #undef simdutf_vec
 
-        // 4. expand words 16-bit => 32-bit
+        // 4. expand code units 16-bit => 32-bit
         const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
         const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
 
-        // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+        // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
         const uint16_t mask = (one_byte_bitmask & 0x5555) |
                               (one_or_two_bytes_bitmask & 0xaaaa);
         if(mask == 0) {
-          // We only have three-byte words. Use fast path.
+          // We only have three-byte code units. Use fast path.
           const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
           const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
           const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
@@ -26053,11 +30087,10 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
   return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
 }
 /* end file src/westmere/sse_convert_utf16_to_utf8.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf16_to_utf32.cpp
 /* begin file src/westmere/sse_convert_utf16_to_utf32.cpp */
 /*
     The vectorized algorithm works on single SSE register i.e., it
-    loads eight 16-bit words.
+    loads eight 16-bit code units.
 
     We consider three cases:
     1. an input register contains no surrogates and each value
@@ -26069,7 +30102,7 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
 
     Ad 1.
 
-    When values are less than 0x0800, it means that a 16-bit words
+    When values are less than 0x0800, it means that a 16-bit code unit
     can be converted into: 1) single UTF8 byte (when it's an ASCII
     char) or 2) two UTF8 bytes.
 
@@ -26083,7 +30116,7 @@ std::pair sse_convert_utf16_to_utf8_with_errors(const char16_t* b
 
     Ad 2.
 
-    When values fit in 16-bit words, but are above 0x07ff, then
+    When values fit in 16-bit code units, but are above 0x07ff, then
     a single word may produce one, two or three UTF8 bytes.
 
     We prepare data for all these three cases in two registers.
@@ -26115,7 +30148,7 @@ std::pair sse_convert_utf16_to_utf32(const char16_t*
   const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
   const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
 
-  while (buf + 16 <= end) {
+  while (buf + 8 <= end) {
     __m128i in = _mm_loadu_si128((__m128i*)buf);
 
     if (big_endian) {
@@ -26134,7 +30167,7 @@ std::pair sse_convert_utf16_to_utf32(const char16_t*
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x0000) {
-      // case: no surrogate pair, extend 16-bit words to 32-bit words
+      // case: no surrogate pair, extend 16-bit code units to 32-bit code units
         _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu16_epi32(in));
         _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(in,8)));
         utf32_output += 8;
@@ -26183,7 +30216,7 @@ std::pair sse_convert_utf16_to_utf32_with_errors(const char16
   const __m128i v_f800 = _mm_set1_epi16((int16_t)0xf800);
   const __m128i v_d800 = _mm_set1_epi16((int16_t)0xd800);
 
-  while (buf + 16 <= end) {
+  while (buf + 8 <= end) {
     __m128i in = _mm_loadu_si128((__m128i*)buf);
 
     if (big_endian) {
@@ -26202,7 +30235,7 @@ std::pair sse_convert_utf16_to_utf32_with_errors(const char16
     // It might seem like checking for surrogates_bitmask == 0xc000 could help. However,
     // it is likely an uncommon occurrence.
     if (surrogates_bitmask == 0x0000) {
-      // case: no surrogate pair, extend 16-bit words to 32-bit words
+      // case: no surrogate pair, extend 16-bit code units to 32-bit code units
         _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output), _mm_cvtepu16_epi32(in));
         _mm_storeu_si128(reinterpret_cast<__m128i *>(utf32_output+4), _mm_cvtepu16_epi32(_mm_srli_si128(in,8)));
         utf32_output += 8;
@@ -26237,51 +30270,139 @@ std::pair sse_convert_utf16_to_utf32_with_errors(const char16
 }
 /* end file src/westmere/sse_convert_utf16_to_utf32.cpp */
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf32_to_utf8.cpp
+/* begin file src/westmere/sse_convert_utf32_to_latin1.cpp */
+std::pair
+sse_convert_utf32_to_latin1(const char32_t *buf, size_t len,
+                            char *latin1_output) {
+  const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16
+
+  __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00);
+  __m128i shufmask =
+      _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
+
+  for (size_t i = 0; i < rounded_len; i += 16) {
+    __m128i in1 = _mm_loadu_si128((__m128i *)buf);
+    __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4));
+    __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8));
+    __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12));
+
+    __m128i check_combined = _mm_or_si128(in1, in2);
+    check_combined = _mm_or_si128(check_combined, in3);
+    check_combined = _mm_or_si128(check_combined, in4);
+
+    if (!_mm_testz_si128(check_combined, high_bytes_mask)) {
+      return std::make_pair(nullptr, latin1_output);
+    }
+    __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), _mm_shuffle_epi8(in2, shufmask));
+    __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), _mm_shuffle_epi8(in4, shufmask));
+    __m128i pack = _mm_unpacklo_epi64(pack1, pack2);
+    _mm_storeu_si128((__m128i *)latin1_output, pack);
+    latin1_output += 16;
+    buf += 16;
+  }
+
+  return std::make_pair(buf, latin1_output);
+}
+
+std::pair
+sse_convert_utf32_to_latin1_with_errors(const char32_t *buf, size_t len,
+                                        char *latin1_output) {
+  const char32_t *start = buf;
+  const size_t rounded_len = len & ~0xF; // Round down to nearest multiple of 16
+
+  __m128i high_bytes_mask = _mm_set1_epi32(0xFFFFFF00);
+  __m128i shufmask =
+      _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 8, 4, 0);
+
+  for (size_t i = 0; i < rounded_len; i += 16) {
+    __m128i in1 = _mm_loadu_si128((__m128i *)buf);
+    __m128i in2 = _mm_loadu_si128((__m128i *)(buf + 4));
+    __m128i in3 = _mm_loadu_si128((__m128i *)(buf + 8));
+    __m128i in4 = _mm_loadu_si128((__m128i *)(buf + 12));
+
+    __m128i check_combined = _mm_or_si128(in1, in2);
+    check_combined = _mm_or_si128(check_combined, in3);
+    check_combined = _mm_or_si128(check_combined, in4);
+
+    if (!_mm_testz_si128(check_combined, high_bytes_mask)) {
+      // Fallback to scalar code for handling errors
+      for (int k = 0; k < 16; k++) {
+        char32_t codepoint = buf[k];
+        if (codepoint <= 0xff) {
+          *latin1_output++ = char(codepoint);
+        } else {
+          return std::make_pair(result(error_code::TOO_LARGE, buf - start + k),
+                                latin1_output);
+        }
+      }
+      buf += 16;
+      continue;
+    }
+    __m128i pack1 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in1, shufmask), _mm_shuffle_epi8(in2, shufmask));
+    __m128i pack2 = _mm_unpacklo_epi32(_mm_shuffle_epi8(in3, shufmask), _mm_shuffle_epi8(in4, shufmask));
+    __m128i pack = _mm_unpacklo_epi64(pack1, pack2);
+    _mm_storeu_si128((__m128i *)latin1_output, pack);
+    latin1_output += 16;
+    buf += 16;
+  }
+
+  return std::make_pair(result(error_code::SUCCESS, buf - start),
+                        latin1_output);
+}
+/* end file src/westmere/sse_convert_utf32_to_latin1.cpp */
 /* begin file src/westmere/sse_convert_utf32_to_utf8.cpp */
 std::pair sse_convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) {
   const char32_t* end = buf + len;
 
-  const __m128i v_0000 = _mm_setzero_si128();
-  const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
-  const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080);
-  const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80);
-  const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000);
-  const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff);
+  const __m128i v_0000 = _mm_setzero_si128();//__m128 = 128 bits
+  const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800); //1111 1000 0000 0000
+  const __m128i v_c080 = _mm_set1_epi16((uint16_t)0xc080); //1100 0000 1000 0000
+  const __m128i v_ff80 = _mm_set1_epi16((uint16_t)0xff80); //1111 1111 1000 0000
+  const __m128i v_ffff0000 = _mm_set1_epi32((uint32_t)0xffff0000); //1111 1111 1111 1111 0000 0000 0000 0000
+  const __m128i v_7fffffff = _mm_set1_epi32((uint32_t)0x7fffffff); //0111 1111 1111 1111 1111 1111 1111 1111 
   __m128i running_max = _mm_setzero_si128();
   __m128i forbidden_bytemask = _mm_setzero_si128();
   const size_t safety_margin = 12; // to avoid overruns, see issue https://github.com/simdutf/simdutf/issues/92
 
-  while (buf + 16 + safety_margin <= end) {
+  while (buf + 16 + safety_margin <= end) { //buf is a char32_t pointer, each char32_t has 4 bytes or 32 bits, thus buf + 16 * char_32t = 512 bits = 64 bytes
     // We load two 16 bytes registers for a total of 32 bytes or 16 characters.
     __m128i in = _mm_loadu_si128((__m128i*)buf);
-    __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
-    running_max = _mm_max_epu32(_mm_max_epu32(in, running_max), nextin);
-
-    // Pack 32-bit UTF-32 words to 16-bit UTF-16 words with unsigned saturation
-    __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff), _mm_and_si128(nextin, v_7fffffff));
+    __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);//These two values can hold only 8 UTF32 chars
+    running_max = _mm_max_epu32(
+                                _mm_max_epu32(in, running_max), //take element-wise max char32_t from in and running_max vector
+                                 nextin); //and take element-wise max element from nextin and running_max vector
+
+    // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned saturation
+    __m128i in_16 = _mm_packus_epi32(
+                                      _mm_and_si128(in, v_7fffffff), 
+                                      _mm_and_si128(nextin, v_7fffffff)
+                                      );//in this context pack the two __m128 into a single 
+    //By ensuring the highest bit is set to 0(&v_7fffffff), we're making sure all values are interpreted as non-negative, or specifically, the values are within the range of valid Unicode code points.
+    //remember : having leading byte 0 means a positive number by the two complements system. Unicode is well beneath the range where you'll start getting issues so that's OK. 
 
     // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp
 
-    // Check for ASCII fast path
-    if(_mm_testz_si128(in_16, v_ff80)) { // ASCII fast path!!!!
+    // Check for ASCII fast path 
+
+    // ASCII fast path!!!!
       // We eagerly load another 32 bytes, hoping that they will be ASCII too.
       // The intuition is that we try to collect 16 ASCII characters which requires
       // a total of 64 bytes of input. If we fail, we just pass thirdin and fourthin
       // as our new inputs.
+    if(_mm_testz_si128(in_16, v_ff80)) {  //if the first two blocks are ASCII
       __m128i thirdin = _mm_loadu_si128((__m128i*)buf+2);
       __m128i fourthin = _mm_loadu_si128((__m128i*)buf+3);
-      running_max = _mm_max_epu32(_mm_max_epu32(thirdin, running_max), fourthin);
-      __m128i nextin_16 = _mm_packus_epi32(_mm_and_si128(thirdin, v_7fffffff), _mm_and_si128(fourthin, v_7fffffff));
-      if(!_mm_testz_si128(nextin_16, v_ff80)) {
+      running_max = _mm_max_epu32(_mm_max_epu32(thirdin, running_max), fourthin);//take the running max of all 4 vectors thus far
+      __m128i nextin_16 = _mm_packus_epi32(_mm_and_si128(thirdin, v_7fffffff), _mm_and_si128(fourthin, v_7fffffff));//pack into 1 vector, now you have two
+      if(!_mm_testz_si128(nextin_16, v_ff80)) {  //checks if the second packed vector is ASCII, if not:
         // 1. pack the bytes
         // obviously suboptimal.
-        const __m128i utf8_packed = _mm_packus_epi16(in_16,in_16);
+        const __m128i utf8_packed = _mm_packus_epi16(in_16,in_16); //creates two copy of in_16 in 1 vector
         // 2. store (16 bytes)
-        _mm_storeu_si128((__m128i*)utf8_output, utf8_packed);
+        _mm_storeu_si128((__m128i*)utf8_output, utf8_packed); //put them into the output
         // 3. adjust pointers
-        buf += 8;
-        utf8_output += 8;
+        buf += 8; //the char32_t buffer pointer goes up 8 char32_t chars* 32 bits =  256 bits
+        utf8_output += 8; //same with output, e.g. lift the first two blocks alone.
         // Proceed with next input
         in_16 = nextin_16;
         // We need to update in and nextin because they are used later.
@@ -26299,32 +30420,36 @@ std::pair sse_convert_utf32_to_utf8(const char32_t* buf,
       }
     }
 
-    // no bits set above 7th bit
-    const __m128i one_byte_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_ff80), v_0000);
-    const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8(one_byte_bytemask));
+    // no bits set above 7th bit -- find out all the ASCII characters
+    const __m128i one_byte_bytemask = _mm_cmpeq_epi16( // this takes four bytes at a time and compares: 
+                                                      _mm_and_si128(in_16, v_ff80), // the vector that get only the first 9 bits of each 16-bit/2-byte units
+                                                       v_0000 //
+                                                       ); // they should be all zero if they are ASCII. E.g. ASCII in UTF32 is of format 0000 0000 0000 0XXX XXXX
+    // _mm_cmpeq_epi16 should now return a 1111 1111 1111 1111 for equals, and 0000 0000 0000 0000 if not for each 16-bit/2-byte units
+    const uint16_t one_byte_bitmask = static_cast(_mm_movemask_epi8(one_byte_bytemask)); // collect the MSB from previous vector and put them into uint16_t mas
 
     // no bits set above 11th bit
     const __m128i one_or_two_bytes_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_0000);
     const uint16_t one_or_two_bytes_bitmask = static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask));
 
     if (one_or_two_bytes_bitmask == 0xffff) {
-      // case: all words either produce 1 or 2 UTF-8 bytes (at least one produces 2 bytes)
+      // case: all code units either produce 1 or 2 UTF-8 bytes (at least one produces 2 bytes)
       // 1. prepare 2-byte values
       // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
       // expected output   : [110a|aaaa|10bb|bbbb] x 8
-      const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00);
-      const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f);
+      const __m128i v_1f00 = _mm_set1_epi16((int16_t)0x1f00); // 0001 1111 0000 0000
+      const __m128i v_003f = _mm_set1_epi16((int16_t)0x003f); // 0000 0000 0011 1111
 
       // t0 = [000a|aaaa|bbbb|bb00]
-      const __m128i t0 = _mm_slli_epi16(in_16, 2);
+      const __m128i t0 = _mm_slli_epi16(in_16, 2); // shift packed vector by two
       // t1 = [000a|aaaa|0000|0000]
-      const __m128i t1 = _mm_and_si128(t0, v_1f00);
+      const __m128i t1 = _mm_and_si128(t0, v_1f00); // potentital first utf8 byte
       // t2 = [0000|0000|00bb|bbbb]
-      const __m128i t2 = _mm_and_si128(in_16, v_003f);
+      const __m128i t2 = _mm_and_si128(in_16, v_003f);// potential second utf8 byte 
       // t3 = [000a|aaaa|00bb|bbbb]
-      const __m128i t3 = _mm_or_si128(t1, t2);
+      const __m128i t3 = _mm_or_si128(t1, t2); // first and second potential utf8 byte together 
       // t4 = [110a|aaaa|10bb|bbbb]
-      const __m128i t4 = _mm_or_si128(t3, v_c080);
+      const __m128i t4 = _mm_or_si128(t3, v_c080); // t3 | 1100 0000 1000 0000 = full potential 2-byte utf8 unit 
 
       // 2. merge ASCII and 2-byte codewords
       const __m128i utf8_unpacked = _mm_blendv_epi8(t4, in_16, one_byte_bytemask);
@@ -26353,7 +30478,7 @@ std::pair sse_convert_utf32_to_utf8(const char32_t* buf,
     const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
     const uint32_t saturation_bitmask = static_cast(_mm_movemask_epi8(saturation_bytemask));
     if (saturation_bitmask == 0xffff) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
       const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
       forbidden_bytemask = _mm_or_si128(forbidden_bytemask, _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800));
 
@@ -26365,7 +30490,7 @@ std::pair sse_convert_utf32_to_utf8(const char32_t* buf,
           2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
           3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-        We expand the input word (16-bit) into two words (32-bit), thus
+        We expand the input word (16-bit) into two code units (32-bit), thus
         we have room for four bytes. However, we need five distinct bit
         layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -26376,7 +30501,7 @@ std::pair sse_convert_utf32_to_utf8(const char32_t* buf,
         either byte 1 for case #2 or byte 2 for case #3. Note that they
         differ by exactly one bit.
 
-        Finally from these two words we build proper UTF-8 sequence, taking
+        Finally from these two code units we build proper UTF-8 sequence, taking
         into account the case (i.e, the number of bytes to write).
       */
       /**
@@ -26404,15 +30529,15 @@ std::pair sse_convert_utf32_to_utf8(const char32_t* buf,
       const __m128i s4 = _mm_xor_si128(s3, m0);
 #undef simdutf_vec
 
-      // 4. expand words 16-bit => 32-bit
+      // 4. expand code units 16-bit => 32-bit
       const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
       const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
 
-      // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
+      // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
       const uint16_t mask = (one_byte_bitmask & 0x5555) |
                             (one_or_two_bytes_bitmask & 0xaaaa);
       if(mask == 0) {
-        // We only have three-byte words. Use fast path.
+        // We only have three-byte code units. Use fast path.
         const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
         const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
         const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
@@ -26511,7 +30636,7 @@ std::pair sse_convert_utf32_to_utf8_with_errors(const char32_t* b
       return std::make_pair(result(error_code::TOO_LARGE, buf - start), utf8_output);
     }
 
-    // Pack 32-bit UTF-32 words to 16-bit UTF-16 words with unsigned saturation
+    // Pack 32-bit UTF-32 code units to 16-bit UTF-16 code units with unsigned saturation
     __m128i in_16 = _mm_packus_epi32(_mm_and_si128(in, v_7fffffff), _mm_and_si128(nextin, v_7fffffff));
 
     // Try to apply UTF-16 => UTF-8 from ./sse_convert_utf16_to_utf8.cpp
@@ -26564,7 +30689,7 @@ std::pair sse_convert_utf32_to_utf8_with_errors(const char32_t* b
     const uint16_t one_or_two_bytes_bitmask = static_cast(_mm_movemask_epi8(one_or_two_bytes_bytemask));
 
     if (one_or_two_bytes_bitmask == 0xffff) {
-      // case: all words either produce 1 or 2 UTF-8 bytes (at least one produces 2 bytes)
+      // case: all code units either produce 1 or 2 UTF-8 bytes (at least one produces 2 bytes)
       // 1. prepare 2-byte values
       // input 16-bit word : [0000|0aaa|aabb|bbbb] x 8
       // expected output   : [110a|aaaa|10bb|bbbb] x 8
@@ -26610,9 +30735,9 @@ std::pair sse_convert_utf32_to_utf8_with_errors(const char32_t* b
     const uint32_t saturation_bitmask = static_cast(_mm_movemask_epi8(saturation_bytemask));
 
     if (saturation_bitmask == 0xffff) {
-      // case: words from register produce either 1, 2 or 3 UTF-8 bytes
+      // case: code units from register produce either 1, 2 or 3 UTF-8 bytes
 
-      // Check for illegal surrogate words
+      // Check for illegal surrogate code units
       const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
       const __m128i forbidden_bytemask = _mm_cmpeq_epi16(_mm_and_si128(in_16, v_f800), v_d800);
       if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) {
@@ -26627,7 +30752,7 @@ std::pair sse_convert_utf32_to_utf8_with_errors(const char32_t* b
           2. [0000|0bbb|bbcc|cccc] => [110b|bbbb], [10cc|cccc]              - two UTF-8 bytes
           3. [aaaa|bbbb|bbcc|cccc] => [1110|aaaa], [10bb|bbbb], [10cc|cccc] - three UTF-8 bytes
 
-        We expand the input word (16-bit) into two words (32-bit), thus
+        We expand the input word (16-bit) into two code units (32-bit), thus
         we have room for four bytes. However, we need five distinct bit
         layouts. Note that the last byte in cases #2 and #3 is the same.
 
@@ -26638,7 +30763,7 @@ std::pair sse_convert_utf32_to_utf8_with_errors(const char32_t* b
         either byte 1 for case #2 or byte 2 for case #3. Note that they
         differ by exactly one bit.
 
-        Finally from these two words we build proper UTF-8 sequence, taking
+        Finally from these two code units we build proper UTF-8 sequence, taking
         into account the case (i.e, the number of bytes to write).
       */
       /**
@@ -26666,321 +30791,709 @@ std::pair sse_convert_utf32_to_utf8_with_errors(const char32_t* b
       const __m128i s4 = _mm_xor_si128(s3, m0);
 #undef simdutf_vec
 
-      // 4. expand words 16-bit => 32-bit
-      const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
-      const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
+      // 4. expand code units 16-bit => 32-bit
+      const __m128i out0 = _mm_unpacklo_epi16(t2, s4);
+      const __m128i out1 = _mm_unpackhi_epi16(t2, s4);
+
+      // 5. compress 32-bit code units into 1, 2 or 3 bytes -- 2 x shuffle
+      const uint16_t mask = (one_byte_bitmask & 0x5555) |
+                            (one_or_two_bytes_bitmask & 0xaaaa);
+      if(mask == 0) {
+        // We only have three-byte code units. Use fast path.
+        const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
+        const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
+        const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
+        _mm_storeu_si128((__m128i*)utf8_output, utf8_0);
+        utf8_output += 12;
+        _mm_storeu_si128((__m128i*)utf8_output, utf8_1);
+        utf8_output += 12;
+        buf += 8;
+        continue;
+      }
+      const uint8_t mask0 = uint8_t(mask);
+
+      const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
+      const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
+      const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
+
+      const uint8_t mask1 = static_cast(mask >> 8);
+
+      const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
+      const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
+      const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
+
+      _mm_storeu_si128((__m128i*)utf8_output, utf8_0);
+      utf8_output += row0[0];
+      _mm_storeu_si128((__m128i*)utf8_output, utf8_1);
+      utf8_output += row1[0];
+
+      buf += 8;
+    } else {
+      // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> will produce four UTF-8 bytes
+      // Let us do a scalar fallback.
+      // It may seem wasteful to use scalar code, but being efficient with SIMD
+      // in the presence of surrogate pairs may require non-trivial tables.
+      size_t forward = 15;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFFFF80)==0) {
+          *utf8_output++ = char(word);
+        } else if((word & 0xFFFFF800)==0) {
+          *utf8_output++ = char((word>>6) | 0b11000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else if((word &0xFFFF0000 )==0) {
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf8_output); }
+          *utf8_output++ = char((word>>12) | 0b11100000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        } else {
+          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf- start + k), utf8_output); }
+          *utf8_output++ = char((word>>18) | 0b11110000);
+          *utf8_output++ = char(((word>>12) & 0b111111) | 0b10000000);
+          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
+          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        }
+      }
+      buf += k;
+    }
+  } // while
+
+  return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
+}
+/* end file src/westmere/sse_convert_utf32_to_utf8.cpp */
+/* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */
+template 
+std::pair sse_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
+
+  const char32_t* end = buf + len;
+
+  const __m128i v_0000 = _mm_setzero_si128();
+  const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000);
+  __m128i forbidden_bytemask = _mm_setzero_si128();
+
+  while (buf + 8 <= end) {
+    __m128i in = _mm_loadu_si128((__m128i*)buf);
+    __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
+    const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
+    const uint32_t saturation_bitmask = static_cast(_mm_movemask_epi8(saturation_bytemask));
+
+    // Check if no bits set above 16th
+    if (saturation_bitmask == 0xffff) {
+      // Pack UTF-32 to UTF-16
+      __m128i utf16_packed = _mm_packus_epi32(in, nextin);
+
+      const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
+      const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
+      forbidden_bytemask = _mm_or_si128(forbidden_bytemask, _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800));
+
+      if (big_endian) {
+        const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        utf16_packed = _mm_shuffle_epi8(utf16_packed, swap);
+      }
+
+      _mm_storeu_si128((__m128i*)utf16_output, utf16_packed);
+      utf16_output += 8;
+      buf += 8;
+    } else {
+      size_t forward = 7;
+      size_t k = 0;
+      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
+      for(; k < forward; k++) {
+        uint32_t word = buf[k];
+        if((word & 0xFFFF0000)==0) {
+          // will not generate a surrogate pair
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, utf16_output); }
+          *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
+        } else {
+          // will generate a surrogate pair
+          if (word > 0x10FFFF) { return std::make_pair(nullptr, utf16_output); }
+          word -= 0x10000;
+          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
+          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
+          if (big_endian) {
+            high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
+            low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
+          }
+          *utf16_output++ = char16_t(high_surrogate);
+          *utf16_output++ = char16_t(low_surrogate);
+        }
+      }
+      buf += k;
+    }
+  }
+
+  // check for invalid input
+  if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(nullptr, utf16_output); }
+
+  return std::make_pair(buf, utf16_output);
+}
 
-      // 5. compress 32-bit words into 1, 2 or 3 bytes -- 2 x shuffle
-      const uint16_t mask = (one_byte_bitmask & 0x5555) |
-                            (one_or_two_bytes_bitmask & 0xaaaa);
-      if(mask == 0) {
-        // We only have three-byte words. Use fast path.
-        const __m128i shuffle = _mm_setr_epi8(2,3,1,6,7,5,10,11,9,14,15,13,-1,-1,-1,-1);
-        const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle);
-        const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle);
-        _mm_storeu_si128((__m128i*)utf8_output, utf8_0);
-        utf8_output += 12;
-        _mm_storeu_si128((__m128i*)utf8_output, utf8_1);
-        utf8_output += 12;
-        buf += 8;
-        continue;
-      }
-      const uint8_t mask0 = uint8_t(mask);
 
-      const uint8_t* row0 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask0][0];
-      const __m128i shuffle0 = _mm_loadu_si128((__m128i*)(row0 + 1));
-      const __m128i utf8_0 = _mm_shuffle_epi8(out0, shuffle0);
+template 
+std::pair sse_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
+  const char32_t* start = buf;
+  const char32_t* end = buf + len;
 
-      const uint8_t mask1 = static_cast(mask >> 8);
+  const __m128i v_0000 = _mm_setzero_si128();
+  const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000);
 
-      const uint8_t* row1 = &simdutf::tables::utf16_to_utf8::pack_1_2_3_utf8_bytes[mask1][0];
-      const __m128i shuffle1 = _mm_loadu_si128((__m128i*)(row1 + 1));
-      const __m128i utf8_1 = _mm_shuffle_epi8(out1, shuffle1);
+  while (buf + 8 <= end) {
+    __m128i in = _mm_loadu_si128((__m128i*)buf);
+    __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
+    const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
+    const uint32_t saturation_bitmask = static_cast(_mm_movemask_epi8(saturation_bytemask));
 
-      _mm_storeu_si128((__m128i*)utf8_output, utf8_0);
-      utf8_output += row0[0];
-      _mm_storeu_si128((__m128i*)utf8_output, utf8_1);
-      utf8_output += row1[0];
+    // Check if no bits set above 16th
+    if (saturation_bitmask == 0xffff) {
+      // Pack UTF-32 to UTF-16
+      __m128i utf16_packed = _mm_packus_epi32(in, nextin);
+
+      const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
+      const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
+      const __m128i forbidden_bytemask = _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800);
+      if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) {
+        return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
+      }
+
+      if (big_endian) {
+        const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
+        utf16_packed = _mm_shuffle_epi8(utf16_packed, swap);
+      }
 
+      _mm_storeu_si128((__m128i*)utf16_output, utf16_packed);
+      utf16_output += 8;
       buf += 8;
     } else {
-      // case: at least one 32-bit word produce a surrogate pair in UTF-16 <=> will produce four UTF-8 bytes
-      // Let us do a scalar fallback.
-      // It may seem wasteful to use scalar code, but being efficient with SIMD
-      // in the presence of surrogate pairs may require non-trivial tables.
-      size_t forward = 15;
+      size_t forward = 7;
       size_t k = 0;
       if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
       for(; k < forward; k++) {
         uint32_t word = buf[k];
-        if((word & 0xFFFFFF80)==0) {
-          *utf8_output++ = char(word);
-        } else if((word & 0xFFFFF800)==0) {
-          *utf8_output++ = char((word>>6) | 0b11000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
-        } else if((word &0xFFFF0000 )==0) {
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf8_output); }
-          *utf8_output++ = char((word>>12) | 0b11100000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+        if((word & 0xFFFF0000)==0) {
+          // will not generate a surrogate pair
+          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
+          *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
         } else {
-          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf- start + k), utf8_output); }
-          *utf8_output++ = char((word>>18) | 0b11110000);
-          *utf8_output++ = char(((word>>12) & 0b111111) | 0b10000000);
-          *utf8_output++ = char(((word>>6) & 0b111111) | 0b10000000);
-          *utf8_output++ = char((word & 0b111111) | 0b10000000);
+          // will generate a surrogate pair
+          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
+          word -= 0x10000;
+          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
+          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
+          if (big_endian) {
+            high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
+            low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
+          }
+          *utf16_output++ = char16_t(high_surrogate);
+          *utf16_output++ = char16_t(low_surrogate);
         }
       }
       buf += k;
     }
-  } // while
+  }
 
-  return std::make_pair(result(error_code::SUCCESS, buf - start), utf8_output);
+  return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
 }
-/* end file src/westmere/sse_convert_utf32_to_utf8.cpp */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=westmere/sse_convert_utf32_to_utf16.cpp
-/* begin file src/westmere/sse_convert_utf32_to_utf16.cpp */
-template 
-std::pair sse_convert_utf32_to_utf16(const char32_t* buf, size_t len, char16_t* utf16_output) {
+/* end file src/westmere/sse_convert_utf32_to_utf16.cpp */
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdutf
+
+/* begin file src/generic/buf_block_reader.h */
+namespace simdutf {
+namespace westmere {
+namespace {
+
+// Walks through a buffer in block-sized increments, loading the last part with spaces
+template
+struct buf_block_reader {
+public:
+  simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
+  simdutf_really_inline size_t block_index();
+  simdutf_really_inline bool has_full_block() const;
+  simdutf_really_inline const uint8_t *full_block() const;
+  /**
+   * Get the last block, padded with spaces.
+   *
+   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
+   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
+   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
+   *
+   * @return the number of effective characters in the last block.
+   */
+  simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
+  simdutf_really_inline void advance();
+private:
+  const uint8_t *buf;
+  const size_t len;
+  const size_t lenminusstep;
+  size_t idx;
+};
+
+// Routines to print masks and text for debugging bitmask operations
+simdutf_unused static char * format_input_text_64(const uint8_t *text) {
+  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
+  for (size_t i=0; i); i++) {
+    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+  }
+  buf[sizeof(simd8x64)] = '\0';
+  return buf;
+}
+
+// Routines to print masks and text for debugging bitmask operations
+simdutf_unused static char * format_input_text(const simd8x64& in) {
+  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
+  in.store(reinterpret_cast(buf));
+  for (size_t i=0; i); i++) {
+    if (buf[i] < ' ') { buf[i] = '_'; }
+  }
+  buf[sizeof(simd8x64)] = '\0';
+  return buf;
+}
+
+simdutf_unused static char * format_mask(uint64_t mask) {
+  static char *buf = reinterpret_cast(malloc(64 + 1));
+  for (size_t i=0; i<64; i++) {
+    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
+  }
+  buf[64] = '\0';
+  return buf;
+}
+
+template
+simdutf_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
+
+template
+simdutf_really_inline size_t buf_block_reader::block_index() { return idx; }
+
+template
+simdutf_really_inline bool buf_block_reader::has_full_block() const {
+  return idx < lenminusstep;
+}
+
+template
+simdutf_really_inline const uint8_t *buf_block_reader::full_block() const {
+  return &buf[idx];
+}
+
+template
+simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const {
+  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
+  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
+  std::memcpy(dst, buf + idx, len - idx);
+  return len - idx;
+}
+
+template
+simdutf_really_inline void buf_block_reader::advance() {
+  idx += STEP_SIZE;
+}
+
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdutf
+/* end file src/generic/buf_block_reader.h */
+/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+namespace simdutf {
+namespace westmere {
+namespace {
+namespace utf8_validation {
+
+using namespace simd;
+
+  simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
+// Bit 1 = Too Long (ASCII followed by continuation)
+// Bit 2 = Overlong 3-byte
+// Bit 4 = Surrogate
+// Bit 5 = Overlong 2-byte
+// Bit 7 = Two Continuations
+    constexpr const uint8_t TOO_SHORT   = 1<<0; // 11______ 0_______
+                                                // 11______ 11______
+    constexpr const uint8_t TOO_LONG    = 1<<1; // 0_______ 10______
+    constexpr const uint8_t OVERLONG_3  = 1<<2; // 11100000 100_____
+    constexpr const uint8_t SURROGATE   = 1<<4; // 11101101 101_____
+    constexpr const uint8_t OVERLONG_2  = 1<<5; // 1100000_ 10______
+    constexpr const uint8_t TWO_CONTS   = 1<<7; // 10______ 10______
+    constexpr const uint8_t TOO_LARGE   = 1<<3; // 11110100 1001____
+                                                // 11110100 101_____
+                                                // 11110101 1001____
+                                                // 11110101 101_____
+                                                // 1111011_ 1001____
+                                                // 1111011_ 101_____
+                                                // 11111___ 1001____
+                                                // 11111___ 101_____
+    constexpr const uint8_t TOO_LARGE_1000 = 1<<6;
+                                                // 11110101 1000____
+                                                // 1111011_ 1000____
+                                                // 11111___ 1000____
+    constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+
+    const simd8 byte_1_high = prev1.shr<4>().lookup_16(
+      // 0_______ ________ 
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      TOO_LONG, TOO_LONG, TOO_LONG, TOO_LONG,
+      // 10______ ________ 
+      TWO_CONTS, TWO_CONTS, TWO_CONTS, TWO_CONTS,
+      // 1100____ ________ 
+      TOO_SHORT | OVERLONG_2,
+      // 1101____ ________ 
+      TOO_SHORT,
+      // 1110____ ________ 
+      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      // 1111____ ________ 
+      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+    );
+    constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
+    const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
+      // ____0000 ________
+      CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4,
+      // ____0001 ________
+      CARRY | OVERLONG_2,
+      // ____001_ ________
+      CARRY,
+      CARRY,
 
-  const char32_t* end = buf + len;
+      // ____0100 ________
+      CARRY | TOO_LARGE,
+      // ____0101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____011_ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
 
-  const __m128i v_0000 = _mm_setzero_si128();
-  const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000);
-  __m128i forbidden_bytemask = _mm_setzero_si128();
+      // ____1___ ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      // ____1101 ________
+      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
+      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      CARRY | TOO_LARGE | TOO_LARGE_1000
+    );
+    const simd8 byte_2_high = input.shr<4>().lookup_16(
+      // ________ 0_______ 
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT,
 
-  while (buf + 8 <= end) {
-    __m128i in = _mm_loadu_si128((__m128i*)buf);
-    __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
-    const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
-    const uint32_t saturation_bitmask = static_cast(_mm_movemask_epi8(saturation_bytemask));
+      // ________ 1000____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4,
+      // ________ 1001____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE,
+      // ________ 101_____
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
+      TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE  | TOO_LARGE,
 
-    // Check if no bits set above 16th
-    if (saturation_bitmask == 0xffff) {
-      // Pack UTF-32 to UTF-16
-      __m128i utf16_packed = _mm_packus_epi32(in, nextin);
+      // ________ 11______
+      TOO_SHORT, TOO_SHORT, TOO_SHORT, TOO_SHORT
+    );
+    return (byte_1_high & byte_1_low & byte_2_high);
+  }
+  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
+      const simd8 prev_input, const simd8 sc) {
+    simd8 prev2 = input.prev<2>(prev_input);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
+  }
 
-      const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
-      const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
-      forbidden_bytemask = _mm_or_si128(forbidden_bytemask, _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800));
+  //
+  // Return nonzero if there are incomplete multibyte characters at the end of the block:
+  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
+  //
+  simdutf_really_inline simd8 is_incomplete(const simd8 input) {
+    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
+    // ... 1111____ 111_____ 11______
+    static const uint8_t max_array[32] = {
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 255, 255, 255,
+      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
+    };
+    const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]);
+    return input.gt_bits(max_value);
+  }
 
-      if (big_endian) {
-        const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        utf16_packed = _mm_shuffle_epi8(utf16_packed, swap);
-      }
+  struct utf8_checker {
+    // If this is nonzero, there has been a UTF-8 error.
+    simd8 error;
+    // The last input we received
+    simd8 prev_input_block;
+    // Whether the last input we received was incomplete (used for ASCII fast path)
+    simd8 prev_incomplete;
 
-      _mm_storeu_si128((__m128i*)utf16_output, utf16_packed);
-      utf16_output += 8;
-      buf += 8;
-    } else {
-      size_t forward = 7;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFF0000)==0) {
-          // will not generate a surrogate pair
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(nullptr, utf16_output); }
-          *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
-        } else {
-          // will generate a surrogate pair
-          if (word > 0x10FFFF) { return std::make_pair(nullptr, utf16_output); }
-          word -= 0x10000;
-          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
-          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
-          if (big_endian) {
-            high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
-            low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
-          }
-          *utf16_output++ = char16_t(high_surrogate);
-          *utf16_output++ = char16_t(low_surrogate);
-        }
-      }
-      buf += k;
+    //
+    // Check whether the current bytes are valid UTF-8.
+    //
+    simdutf_really_inline void check_utf8_bytes(const simd8 input, const simd8 prev_input) {
+      // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
+      // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
+      simd8 prev1 = input.prev<1>(prev_input);
+      simd8 sc = check_special_cases(input, prev1);
+      this->error |= check_multibyte_lengths(input, prev_input, sc);
     }
-  }
 
-  // check for invalid input
-  if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) { return std::make_pair(nullptr, utf16_output); }
+    // The only problem that can happen at EOF is that a multibyte character is too short
+    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
+    // too large in the first of two bytes.
+    simdutf_really_inline void check_eof() {
+      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
+      // possibly finish them.
+      this->error |= this->prev_incomplete;
+    }
 
-  return std::make_pair(buf, utf16_output);
-}
+    simdutf_really_inline void check_next_input(const simd8x64& input) {
+      if(simdutf_likely(is_ascii(input))) {
+        this->error |= this->prev_incomplete;
+      } else {
+        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+        static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+            "We support either two or four chunks per 64-byte block.");
+        if(simd8x64::NUM_CHUNKS == 2) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+        } else if(simd8x64::NUM_CHUNKS == 4) {
+          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
+          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+        }
+        this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]);
+        this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1];
 
+      }
+    }
 
-template 
-std::pair sse_convert_utf32_to_utf16_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) {
-  const char32_t* start = buf;
-  const char32_t* end = buf + len;
+    // do not forget to call check_eof!
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
 
-  const __m128i v_0000 = _mm_setzero_si128();
-  const __m128i v_ffff0000 = _mm_set1_epi32((int32_t)0xffff0000);
+  }; // struct utf8_checker
+} // namespace utf8_validation
 
-  while (buf + 8 <= end) {
-    __m128i in = _mm_loadu_si128((__m128i*)buf);
-    __m128i nextin = _mm_loadu_si128((__m128i*)buf+1);
-    const __m128i saturation_bytemask = _mm_cmpeq_epi32(_mm_and_si128(_mm_or_si128(in, nextin), v_ffff0000), v_0000);
-    const uint32_t saturation_bitmask = static_cast(_mm_movemask_epi8(saturation_bytemask));
+using utf8_validation::utf8_checker;
 
-    // Check if no bits set above 16th
-    if (saturation_bitmask == 0xffff) {
-      // Pack UTF-32 to UTF-16
-      __m128i utf16_packed = _mm_packus_epi32(in, nextin);
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdutf
+/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+/* begin file src/generic/utf8_validation/utf8_validator.h */
+namespace simdutf {
+namespace westmere {
+namespace {
+namespace utf8_validation {
 
-      const __m128i v_f800 = _mm_set1_epi16((uint16_t)0xf800);
-      const __m128i v_d800 = _mm_set1_epi16((uint16_t)0xd800);
-      const __m128i forbidden_bytemask = _mm_cmpeq_epi16(_mm_and_si128(utf16_packed, v_f800), v_d800);
-      if (static_cast(_mm_movemask_epi8(forbidden_bytemask)) != 0) {
-        return std::make_pair(result(error_code::SURROGATE, buf - start), utf16_output);
-      }
+/**
+ * Validates that the string is actual UTF-8.
+ */
+template
+bool generic_validate_utf8(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      c.check_next_input(in);
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    return !c.errors();
+}
 
-      if (big_endian) {
-        const __m128i swap = _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14);
-        utf16_packed = _mm_shuffle_epi8(utf16_packed, swap);
-      }
+bool generic_validate_utf8(const char * input, size_t length) {
+  return generic_validate_utf8(reinterpret_cast(input),length);
+}
 
-      _mm_storeu_si128((__m128i*)utf16_output, utf16_packed);
-      utf16_output += 8;
-      buf += 8;
-    } else {
-      size_t forward = 7;
-      size_t k = 0;
-      if(size_t(end - buf) < forward + 1) { forward = size_t(end - buf - 1);}
-      for(; k < forward; k++) {
-        uint32_t word = buf[k];
-        if((word & 0xFFFF0000)==0) {
-          // will not generate a surrogate pair
-          if (word >= 0xD800 && word <= 0xDFFF) { return std::make_pair(result(error_code::SURROGATE, buf - start + k), utf16_output); }
-          *utf16_output++ = big_endian ? char16_t((uint16_t(word) >> 8) | (uint16_t(word) << 8)) : char16_t(word);
-        } else {
-          // will generate a surrogate pair
-          if (word > 0x10FFFF) { return std::make_pair(result(error_code::TOO_LARGE, buf - start + k), utf16_output); }
-          word -= 0x10000;
-          uint16_t high_surrogate = uint16_t(0xD800 + (word >> 10));
-          uint16_t low_surrogate = uint16_t(0xDC00 + (word & 0x3FF));
-          if (big_endian) {
-            high_surrogate = uint16_t((high_surrogate >> 8) | (high_surrogate << 8));
-            low_surrogate = uint16_t((low_surrogate >> 8) | (low_surrogate << 8));
-          }
-          *utf16_output++ = char16_t(high_surrogate);
-          *utf16_output++ = char16_t(low_surrogate);
-        }
+/**
+ * Validates that the string is actual UTF-8 and stops on errors.
+ */
+template
+result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
+    checker c{};
+    buf_block_reader<64> reader(input, length);
+    size_t count{0};
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      c.check_next_input(in);
+      if(c.errors()) {
+        if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
+        result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input + count), length - count);
+        res.count += count;
+        return res;
       }
-      buf += k;
+      reader.advance();
+      count += 64;
     }
-  }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    c.check_next_input(in);
+    reader.advance();
+    c.check_eof();
+    if (c.errors()) {
+      if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
+      result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input) + count, length - count);
+      res.count += count;
+      return res;
+    } else {
+      return result(error_code::SUCCESS, length);
+    }
+}
 
-  return std::make_pair(result(error_code::SUCCESS, buf - start), utf16_output);
+result generic_validate_utf8_with_errors(const char * input, size_t length) {
+  return generic_validate_utf8_with_errors(reinterpret_cast(input),length);
 }
-/* end file src/westmere/sse_convert_utf32_to_utf16.cpp */
 
-} // unnamed namespace
-} // namespace westmere
-} // namespace simdutf
+template
+bool generic_validate_ascii(const uint8_t * input, size_t length) {
+    buf_block_reader<64> reader(input, length);
+    uint8_t blocks[64]{};
+    simd::simd8x64 running_or(blocks);
+    while (reader.has_full_block()) {
+      simd::simd8x64 in(reader.full_block());
+      running_or |= in;
+      reader.advance();
+    }
+    uint8_t block[64]{};
+    reader.get_remainder(block);
+    simd::simd8x64 in(block);
+    running_or |= in;
+    return running_or.is_ascii();
+}
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/buf_block_reader.h
-/* begin file src/generic/buf_block_reader.h */
-namespace simdutf {
-namespace westmere {
-namespace {
+bool generic_validate_ascii(const char * input, size_t length) {
+  return generic_validate_ascii(reinterpret_cast(input),length);
+}
 
-// Walks through a buffer in block-sized increments, loading the last part with spaces
-template
-struct buf_block_reader {
-public:
-  simdutf_really_inline buf_block_reader(const uint8_t *_buf, size_t _len);
-  simdutf_really_inline size_t block_index();
-  simdutf_really_inline bool has_full_block() const;
-  simdutf_really_inline const uint8_t *full_block() const;
-  /**
-   * Get the last block, padded with spaces.
-   *
-   * There will always be a last block, with at least 1 byte, unless len == 0 (in which case this
-   * function fills the buffer with spaces and returns 0. In particular, if len == STEP_SIZE there
-   * will be 0 full_blocks and 1 remainder block with STEP_SIZE bytes and no spaces for padding.
-   *
-   * @return the number of effective characters in the last block.
-   */
-  simdutf_really_inline size_t get_remainder(uint8_t *dst) const;
-  simdutf_really_inline void advance();
-private:
-  const uint8_t *buf;
-  const size_t len;
-  const size_t lenminusstep;
-  size_t idx;
-};
+template
+result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
+  buf_block_reader<64> reader(input, length);
+  size_t count{0};
+  while (reader.has_full_block()) {
+    simd::simd8x64 in(reader.full_block());
+    if (!in.is_ascii()) {
+      result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
+      return result(res.error, count + res.count);
+    }
+    reader.advance();
 
-// Routines to print masks and text for debugging bitmask operations
-simdutf_unused static char * format_input_text_64(const uint8_t *text) {
-  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
-  for (size_t i=0; i); i++) {
-    buf[i] = int8_t(text[i]) < ' ' ? '_' : int8_t(text[i]);
+    count += 64;
   }
-  buf[sizeof(simd8x64)] = '\0';
-  return buf;
-}
-
-// Routines to print masks and text for debugging bitmask operations
-simdutf_unused static char * format_input_text(const simd8x64& in) {
-  static char *buf = reinterpret_cast(malloc(sizeof(simd8x64) + 1));
-  in.store(reinterpret_cast(buf));
-  for (size_t i=0; i); i++) {
-    if (buf[i] < ' ') { buf[i] = '_'; }
+  uint8_t block[64]{};
+  reader.get_remainder(block);
+  simd::simd8x64 in(block);
+  if (!in.is_ascii()) {
+    result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
+    return result(res.error, count + res.count);
+  } else {
+    return result(error_code::SUCCESS, length);
   }
-  buf[sizeof(simd8x64)] = '\0';
-  return buf;
 }
 
-simdutf_unused static char * format_mask(uint64_t mask) {
-  static char *buf = reinterpret_cast(malloc(64 + 1));
-  for (size_t i=0; i<64; i++) {
-    buf[i] = (mask & (size_t(1) << i)) ? 'X' : ' ';
-  }
-  buf[64] = '\0';
-  return buf;
+result generic_validate_ascii_with_errors(const char * input, size_t length) {
+  return generic_validate_ascii_with_errors(reinterpret_cast(input),length);
 }
 
-template
-simdutf_really_inline buf_block_reader::buf_block_reader(const uint8_t *_buf, size_t _len) : buf{_buf}, len{_len}, lenminusstep{len < STEP_SIZE ? 0 : len - STEP_SIZE}, idx{0} {}
-
-template
-simdutf_really_inline size_t buf_block_reader::block_index() { return idx; }
+} // namespace utf8_validation
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdutf
+/* end file src/generic/utf8_validation/utf8_validator.h */
+// transcoding from UTF-8 to UTF-16
+/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
 
-template
-simdutf_really_inline bool buf_block_reader::has_full_block() const {
-  return idx < lenminusstep;
-}
 
-template
-simdutf_really_inline const uint8_t *buf_block_reader::full_block() const {
-  return &buf[idx];
-}
+namespace simdutf {
+namespace westmere {
+namespace {
+namespace utf8_to_utf16 {
 
-template
-simdutf_really_inline size_t buf_block_reader::get_remainder(uint8_t *dst) const {
-  if(len == idx) { return 0; } // memcpy(dst, null, 0) will trigger an error with some sanitizers
-  std::memset(dst, 0x20, STEP_SIZE); // std::memset STEP_SIZE because it's more efficient to write out 8 or 16 bytes at once.
-  std::memcpy(dst, buf + idx, len - idx);
-  return len - idx;
-}
+using namespace simd;
 
-template
-simdutf_really_inline void buf_block_reader::advance() {
-  idx += STEP_SIZE;
+template 
+simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
+    char16_t* utf16_output) noexcept {
+  // The implementation is not specific to haswell and should be moved to the generic directory.
+  size_t pos = 0;
+  char16_t* start{utf16_output};
+  const size_t safety_margin = 16; // to avoid overruns!
+  while(pos + 64 + safety_margin <= size) {
+    // this loop could be unrolled further. For example, we could process the mask
+    // far more than 64 bytes.
+    simd8x64 in(reinterpret_cast(input + pos));
+    if(in.is_ascii()) {
+      in.store_ascii_as_utf16(utf16_output);
+      utf16_output += 64;
+      pos += 64;
+    } else {
+      // Slow path. We hope that the compiler will recognize that this is a slow path.
+      // Anything that is not a continuation mask is a 'leading byte', that is, the
+      // start of a new code point.
+      uint64_t utf8_continuation_mask = in.lt(-65 + 1);
+      // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
+      uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+      // The *start* of code points is not so useful, rather, we want the *end* of code points.
+      uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+      // We process in blocks of up to 12 bytes except possibly
+      // for fast paths which may process up to 16 bytes. For the
+      // slow path to work, we should have at least 12 input bytes left.
+      size_t max_starting_point = (pos + 64) - 12;
+      // Next loop is going to run at least five times when using solely
+      // the slow/regular path, and at least four times if there are fast paths.
+      while(pos < max_starting_point) {
+        // Performance note: our ability to compute 'consumed' and
+        // then shift and recompute is critical. If there is a
+        // latency of, say, 4 cycles on getting 'consumed', then
+        // the inner loop might have a total latency of about 6 cycles.
+        // Yet we process between 6 to 12 inputs bytes, thus we get
+        // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+        // for this section of the code. Hence, there is a limit
+        // to how much we can further increase this latency before
+        // it seriously harms performance.
+        //
+        // Thus we may allow convert_masked_utf8_to_utf16 to process
+        // more bytes at a time under a fast-path mode where 16 bytes
+        // are consumed at once (e.g., when encountering ASCII).
+        size_t consumed = convert_masked_utf8_to_utf16(input + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+        pos += consumed;
+        utf8_end_of_code_point_mask >>= consumed;
+      }
+      // At this point there may remain between 0 and 12 bytes in the
+      // 64-byte block. These bytes will be processed again. So we have an
+      // 80% efficiency (in the worst case). In practice we expect an
+      // 85% to 90% efficiency.
+    }
+  }
+  utf16_output += scalar::utf8_to_utf16::convert_valid(input + pos, size - pos, utf16_output);
+  return utf16_output - start;
 }
 
+} // namespace utf8_to_utf16
 } // unnamed namespace
 } // namespace westmere
 } // namespace simdutf
-/* end file src/generic/buf_block_reader.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_lookup4_algorithm.h
-/* begin file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
+/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
+/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+
+
 namespace simdutf {
 namespace westmere {
 namespace {
-namespace utf8_validation {
-
+namespace utf8_to_utf16 {
 using namespace simd;
 
+
   simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
 // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
 // Bit 1 = Too Long (ASCII followed by continuation)
@@ -27074,37 +31587,18 @@ using namespace simd;
   simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
       const simd8 prev_input, const simd8 sc) {
     simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
-
-  //
-  // Return nonzero if there are incomplete multibyte characters at the end of the block:
-  // e.g. if there is a 4-byte character, but it's 3 bytes from the end.
-  //
-  simdutf_really_inline simd8 is_incomplete(const simd8 input) {
-    // If the previous input's last 3 bytes match this, they're too short (they ended at EOF):
-    // ... 1111____ 111_____ 11______
-    static const uint8_t max_array[32] = {
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 255, 255, 255,
-      255, 255, 255, 255, 255, 0b11110000u-1, 0b11100000u-1, 0b11000000u-1
-    };
-    const simd8 max_value(&max_array[sizeof(max_array)-sizeof(simd8)]);
-    return input.gt_bits(max_value);
+    simd8 prev3 = input.prev<3>(prev_input);
+    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
+    simd8 must23_80 = must23 & uint8_t(0x80);
+    return must23_80 ^ sc;
   }
 
-  struct utf8_checker {
+
+  struct validating_transcoder {
     // If this is nonzero, there has been a UTF-8 error.
     simd8 error;
-    // The last input we received
-    simd8 prev_input_block;
-    // Whether the last input we received was incomplete (used for ASCII fast path)
-    simd8 prev_incomplete;
 
+    validating_transcoder() : error(uint8_t(0)) {}
     //
     // Check whether the current bytes are valid UTF-8.
     //
@@ -27116,262 +31610,239 @@ using namespace simd;
       this->error |= check_multibyte_lengths(input, prev_input, sc);
     }
 
-    // The only problem that can happen at EOF is that a multibyte character is too short
-    // or a byte value too large in the last bytes: check_special_cases only checks for bytes
-    // too large in the first of two bytes.
-    simdutf_really_inline void check_eof() {
-      // If the previous block had incomplete UTF-8 characters at the end, an ASCII block can't
-      // possibly finish them.
-      this->error |= this->prev_incomplete;
-    }
 
-    simdutf_really_inline void check_next_input(const simd8x64& input) {
-      if(simdutf_likely(is_ascii(input))) {
-        this->error |= this->prev_incomplete;
-      } else {
-        // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-        static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-            "We support either two or four chunks per 64-byte block.");
-        if(simd8x64::NUM_CHUNKS == 2) {
-          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
-          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-        } else if(simd8x64::NUM_CHUNKS == 4) {
-          this->check_utf8_bytes(input.chunks[0], this->prev_input_block);
-          this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-          this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+    template 
+    simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
+      size_t pos = 0;
+      char16_t* start{utf16_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf16(utf16_output);
+          utf16_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
         }
-        this->prev_incomplete = is_incomplete(input.chunks[simd8x64::NUM_CHUNKS-1]);
-        this->prev_input_block = input.chunks[simd8x64::NUM_CHUNKS-1];
-
       }
+      if(errors()) { return 0; }
+      if(pos < size) {
+        size_t howmany  = scalar::utf8_to_utf16::convert(in + pos, size - pos, utf16_output);
+        if(howmany == 0) { return 0; }
+        utf16_output += howmany;
+      }
+      return utf16_output - start;
     }
 
-    // do not forget to call check_eof!
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
-    }
-
-  }; // struct utf8_checker
-} // namespace utf8_validation
-
-using utf8_validation::utf8_checker;
-
-} // unnamed namespace
-} // namespace westmere
-} // namespace simdutf
-/* end file src/generic/utf8_validation/utf8_lookup4_algorithm.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_validation/utf8_validator.h
-/* begin file src/generic/utf8_validation/utf8_validator.h */
-namespace simdutf {
-namespace westmere {
-namespace {
-namespace utf8_validation {
-
-/**
- * Validates that the string is actual UTF-8.
- */
-template
-bool generic_validate_utf8(const uint8_t * input, size_t length) {
-    checker c{};
-    buf_block_reader<64> reader(input, length);
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      c.check_next_input(in);
-      reader.advance();
-    }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    c.check_next_input(in);
-    reader.advance();
-    c.check_eof();
-    return !c.errors();
-}
-
-bool generic_validate_utf8(const char * input, size_t length) {
-  return generic_validate_utf8(reinterpret_cast(input),length);
-}
-
-/**
- * Validates that the string is actual UTF-8 and stops on errors.
- */
-template
-result generic_validate_utf8_with_errors(const uint8_t * input, size_t length) {
-    checker c{};
-    buf_block_reader<64> reader(input, length);
-    size_t count{0};
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      c.check_next_input(in);
-      if(c.errors()) {
-        if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
-        result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input + count), length - count);
-        res.count += count;
+    template 
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
+      size_t pos = 0;
+      char16_t* start{utf16_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65);
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store_ascii_as_utf16(utf16_output);
+          utf16_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          if (errors()) {
+            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+            result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+            res.count += pos;
+            return res;
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
+                            utf8_end_of_code_point_mask, utf16_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        res.count += pos;
         return res;
       }
-      reader.advance();
-      count += 64;
-    }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    c.check_next_input(in);
-    reader.advance();
-    c.check_eof();
-    if (c.errors()) {
-      if (count != 0) { count--; } // Sometimes the error is only detected in the next chunk
-      result res = scalar::utf8::rewind_and_validate_with_errors(reinterpret_cast(input), reinterpret_cast(input) + count, length - count);
-      res.count += count;
-      return res;
-    } else {
-      return result(error_code::SUCCESS, length);
-    }
-}
-
-result generic_validate_utf8_with_errors(const char * input, size_t length) {
-  return generic_validate_utf8_with_errors(reinterpret_cast(input),length);
-}
-
-template
-bool generic_validate_ascii(const uint8_t * input, size_t length) {
-    buf_block_reader<64> reader(input, length);
-    uint8_t blocks[64]{};
-    simd::simd8x64 running_or(blocks);
-    while (reader.has_full_block()) {
-      simd::simd8x64 in(reader.full_block());
-      running_or |= in;
-      reader.advance();
+      if(pos < size) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        if (res.error) {    // In case of error, we want the error position
+          res.count += pos;
+          return res;
+        } else {    // In case of success, we want the number of word written
+          utf16_output += res.count;
+        }
+      }
+      return result(error_code::SUCCESS, utf16_output - start);
     }
-    uint8_t block[64]{};
-    reader.get_remainder(block);
-    simd::simd8x64 in(block);
-    running_or |= in;
-    return running_or.is_ascii();
-}
-
-bool generic_validate_ascii(const char * input, size_t length) {
-  return generic_validate_ascii(reinterpret_cast(input),length);
-}
 
-template
-result generic_validate_ascii_with_errors(const uint8_t * input, size_t length) {
-  buf_block_reader<64> reader(input, length);
-  size_t count{0};
-  while (reader.has_full_block()) {
-    simd::simd8x64 in(reader.full_block());
-    if (!in.is_ascii()) {
-      result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
-      return result(res.error, count + res.count);
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
     }
-    reader.advance();
-
-    count += 64;
-  }
-  uint8_t block[64]{};
-  reader.get_remainder(block);
-  simd::simd8x64 in(block);
-  if (!in.is_ascii()) {
-    result res = scalar::ascii::validate_with_errors(reinterpret_cast(input + count), length - count);
-    return result(res.error, count + res.count);
-  } else {
-    return result(error_code::SUCCESS, length);
-  }
-}
-
-result generic_validate_ascii_with_errors(const char * input, size_t length) {
-  return generic_validate_ascii_with_errors(reinterpret_cast(input),length);
-}
 
-} // namespace utf8_validation
+  }; // struct utf8_checker
+} // utf8_to_utf16 namespace
 } // unnamed namespace
 } // namespace westmere
 } // namespace simdutf
-/* end file src/generic/utf8_validation/utf8_validator.h */
-// transcoding from UTF-8 to UTF-16
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/valid_utf8_to_utf16.h
-/* begin file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
-
+/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+// transcoding from UTF-8 to UTF-32
+/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
 
 namespace simdutf {
 namespace westmere {
 namespace {
-namespace utf8_to_utf16 {
+namespace utf8_to_utf32 {
 
 using namespace simd;
 
-template 
+
 simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
-    char16_t* utf16_output) noexcept {
-  // The implementation is not specific to haswell and should be moved to the generic directory.
+    char32_t* utf32_output) noexcept {
   size_t pos = 0;
-  char16_t* start{utf16_output};
+  char32_t* start{utf32_output};
   const size_t safety_margin = 16; // to avoid overruns!
   while(pos + 64 + safety_margin <= size) {
-    // this loop could be unrolled further. For example, we could process the mask
-    // far more than 64 bytes.
     simd8x64 in(reinterpret_cast(input + pos));
     if(in.is_ascii()) {
-      in.store_ascii_as_utf16(utf16_output);
-      utf16_output += 64;
+      in.store_ascii_as_utf32(utf32_output);
+      utf32_output += 64;
       pos += 64;
     } else {
-      // Slow path. We hope that the compiler will recognize that this is a slow path.
-      // Anything that is not a continuation mask is a 'leading byte', that is, the
-      // start of a new code point.
-      uint64_t utf8_continuation_mask = in.lt(-65 + 1);
-      // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
-      uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-      // The *start* of code points is not so useful, rather, we want the *end* of code points.
-      uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-      // We process in blocks of up to 12 bytes except possibly
-      // for fast paths which may process up to 16 bytes. For the
-      // slow path to work, we should have at least 12 input bytes left.
-      size_t max_starting_point = (pos + 64) - 12;
-      // Next loop is going to run at least five times when using solely
-      // the slow/regular path, and at least four times if there are fast paths.
-      while(pos < max_starting_point) {
-        // Performance note: our ability to compute 'consumed' and
-        // then shift and recompute is critical. If there is a
-        // latency of, say, 4 cycles on getting 'consumed', then
-        // the inner loop might have a total latency of about 6 cycles.
-        // Yet we process between 6 to 12 inputs bytes, thus we get
-        // a speed limit between 1 cycle/byte and 0.5 cycle/byte
-        // for this section of the code. Hence, there is a limit
-        // to how much we can further increase this latency before
-        // it seriously harms performance.
-        //
-        // Thus we may allow convert_masked_utf8_to_utf16 to process
-        // more bytes at a time under a fast-path mode where 16 bytes
-        // are consumed at once (e.g., when encountering ASCII).
-        size_t consumed = convert_masked_utf8_to_utf16(input + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
-        pos += consumed;
-        utf8_end_of_code_point_mask >>= consumed;
+    // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
+    uint64_t utf8_continuation_mask = in.lt(-65 + 1);
+    uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+    uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+    size_t max_starting_point = (pos + 64) - 12;
+    while(pos < max_starting_point) {
+      size_t consumed = convert_masked_utf8_to_utf32(input + pos,
+                          utf8_end_of_code_point_mask, utf32_output);
+      pos += consumed;
+      utf8_end_of_code_point_mask >>= consumed;
       }
-      // At this point there may remain between 0 and 12 bytes in the
-      // 64-byte block. These bytes will be processed again. So we have an
-      // 80% efficiency (in the worst case). In practice we expect an
-      // 85% to 90% efficiency.
     }
   }
-  utf16_output += scalar::utf8_to_utf16::convert_valid(input + pos, size - pos, utf16_output);
-  return utf16_output - start;
+  utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
+  return utf32_output - start;
 }
 
-} // namespace utf8_to_utf16
+
+} // namespace utf8_to_utf32
 } // unnamed namespace
 } // namespace westmere
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf16/valid_utf8_to_utf16.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf16/utf8_to_utf16.h
-/* begin file src/generic/utf8_to_utf16/utf8_to_utf16.h */
+/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
+/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
 
 
 namespace simdutf {
 namespace westmere {
 namespace {
-namespace utf8_to_utf16 {
+namespace utf8_to_utf32 {
 using namespace simd;
 
 
@@ -27492,28 +31963,28 @@ using namespace simd;
     }
 
 
-    template 
-    simdutf_really_inline size_t convert(const char* in, size_t size, char16_t* utf16_output) {
+
+    simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
-      char16_t* start{utf16_output};
+      char32_t* start{utf32_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 8; margin--) {
+      for(; margin > 0 && leading_byte < 4; margin--) {
         leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf16(utf16_output);
-          utf16_output += 64;
+          input.store_ascii_as_utf32(utf32_output);
+          utf32_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -27547,8 +32018,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
+            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
+                            utf8_end_of_code_point_mask, utf32_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -27560,35 +32031,34 @@ using namespace simd;
       }
       if(errors()) { return 0; }
       if(pos < size) {
-        size_t howmany  = scalar::utf8_to_utf16::convert(in + pos, size - pos, utf16_output);
+        size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
         if(howmany == 0) { return 0; }
-        utf16_output += howmany;
+        utf32_output += howmany;
       }
-      return utf16_output - start;
+      return utf32_output - start;
     }
 
-    template 
-    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char16_t* utf16_output) {
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
       size_t pos = 0;
-      char16_t* start{utf16_output};
+      char32_t* start{utf32_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf16. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 8; margin--) {
+      for(; margin > 0 && leading_byte < 4; margin--) {
         leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf16(utf16_output);
-          utf16_output += 64;
+          input.store_ascii_as_utf32(utf32_output);
+          utf32_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -27605,9 +32075,7 @@ using namespace simd;
             this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
           }
           if (errors()) {
-            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-            result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+            result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
             res.count += pos;
             return res;
           }
@@ -27629,8 +32097,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf16(in + pos,
-                            utf8_end_of_code_point_mask, utf16_output);
+            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
+                            utf8_end_of_code_point_mask, utf32_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -27641,95 +32109,157 @@ using namespace simd;
         }
       }
       if(errors()) {
-        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
         res.count += pos;
         return res;
       }
       if(pos < size) {
-        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
-        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
-        result res = scalar::utf8_to_utf16::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf16_output);
+        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
         if (res.error) {    // In case of error, we want the error position
           res.count += pos;
           return res;
         } else {    // In case of success, we want the number of word written
-          utf16_output += res.count;
+          utf32_output += res.count;
         }
       }
-      return result(error_code::SUCCESS, utf16_output - start);
+      return result(error_code::SUCCESS, utf32_output - start);
+    }
+
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
+
+  }; // struct utf8_checker
+} // utf8_to_utf32 namespace
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdutf
+/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
+// other functions
+/* begin file src/generic/utf8.h */
+
+namespace simdutf {
+namespace westmere {
+namespace {
+namespace utf8 {
+
+using namespace simd;
+
+simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    for(;pos + 64 <= size; pos += 64) {
+      simd8x64 input(reinterpret_cast(in + pos));
+      uint64_t utf8_continuation_mask = input.gt(-65);
+      count += count_ones(utf8_continuation_mask);
+    }
+    return count + scalar::utf8::count_code_points(in + pos, size - pos);
+}
+
+simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    // This algorithm could no doubt be improved!
+    for(;pos + 64 <= size; pos += 64) {
+      simd8x64 input(reinterpret_cast(in + pos));
+      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+      // We count one word for anything that is not a continuation (so
+      // leading bytes).
+      count += 64 - count_ones(utf8_continuation_mask);
+      int64_t utf8_4byte = input.gteq_unsigned(240);
+      count += count_ones(utf8_4byte);
     }
+    return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
+}
 
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
-    }
 
-  }; // struct utf8_checker
-} // utf8_to_utf16 namespace
+simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) {
+    return count_code_points(in, size);
+}
+} // utf8 namespace
 } // unnamed namespace
 } // namespace westmere
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf16/utf8_to_utf16.h */
-// transcoding from UTF-8 to UTF-32
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/valid_utf8_to_utf32.h
-/* begin file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
-
+/* end file src/generic/utf8.h */
+/* begin file src/generic/utf16.h */
 namespace simdutf {
 namespace westmere {
 namespace {
-namespace utf8_to_utf32 {
+namespace utf16 {
 
-using namespace simd;
+template 
+simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    for(;pos < size/32*32; pos += 32) {
+      simd16x32 input(reinterpret_cast(in + pos));
+      if (!match_system(big_endian)) { input.swap_bytes(); }
+      uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
+      count += count_ones(not_pair) / 2;
+    }
+    return count + scalar::utf16::count_code_points(in + pos, size - pos);
+}
 
+template 
+simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
+    size_t pos = 0;
+    size_t count = 0;
+    // This algorithm could no doubt be improved!
+    for(;pos < size/32*32; pos += 32) {
+      simd16x32 input(reinterpret_cast(in + pos));
+      if (!match_system(big_endian)) { input.swap_bytes(); }
+      uint64_t ascii_mask = input.lteq(0x7F);
+      uint64_t twobyte_mask = input.lteq(0x7FF);
+      uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
 
-simdutf_warn_unused size_t convert_valid(const char* input, size_t size,
-    char32_t* utf32_output) noexcept {
-  size_t pos = 0;
-  char32_t* start{utf32_output};
-  const size_t safety_margin = 16; // to avoid overruns!
-  while(pos + 64 + safety_margin <= size) {
-    simd8x64 in(reinterpret_cast(input + pos));
-    if(in.is_ascii()) {
-      in.store_ascii_as_utf32(utf32_output);
-      utf32_output += 64;
-      pos += 64;
-    } else {
-    // -65 is 0b10111111 in two-complement's, so largest possible continuation byte
-    uint64_t utf8_continuation_mask = in.lt(-65 + 1);
-    uint64_t utf8_leading_mask = ~utf8_continuation_mask;
-    uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
-    size_t max_starting_point = (pos + 64) - 12;
-    while(pos < max_starting_point) {
-      size_t consumed = convert_masked_utf8_to_utf32(input + pos,
-                          utf8_end_of_code_point_mask, utf32_output);
-      pos += consumed;
-      utf8_end_of_code_point_mask >>= consumed;
-      }
+      size_t ascii_count = count_ones(ascii_mask) / 2;
+      size_t twobyte_count = count_ones(twobyte_mask & ~ ascii_mask) / 2;
+      size_t threebyte_count = count_ones(not_pair_mask & ~ twobyte_mask) / 2;
+      size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2;
+      count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + ascii_count;
     }
-  }
-  utf32_output += scalar::utf8_to_utf32::convert_valid(input + pos, size - pos, utf32_output);
-  return utf32_output - start;
+    return count + scalar::utf16::utf8_length_from_utf16(in + pos, size - pos);
 }
 
+template 
+simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
+    return count_code_points(in, size);
+}
 
-} // namespace utf8_to_utf32
+simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
+  size_t pos = 0;
+
+  while (pos < size/32*32) {
+    simd16x32 input(reinterpret_cast(in + pos));
+    input.swap_bytes();
+    input.store(reinterpret_cast(output));
+    pos += 32;
+    output += 32;
+  }
+
+  scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
+}
+
+} // utf16
 } // unnamed namespace
 } // namespace westmere
 } // namespace simdutf
-/* end file src/generic/utf8_to_utf32/valid_utf8_to_utf32.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8_to_utf32/utf8_to_utf32.h
-/* begin file src/generic/utf8_to_utf32/utf8_to_utf32.h */
+/* end file src/generic/utf16.h */
+// transcoding from UTF-8 to Latin 1
+/* begin file src/generic/utf8_to_latin1/utf8_to_latin1.h */
 
 
 namespace simdutf {
 namespace westmere {
 namespace {
-namespace utf8_to_utf32 {
+namespace utf8_to_latin1 {
 using namespace simd;
 
 
   simdutf_really_inline simd8 check_special_cases(const simd8 input, const simd8 prev1) {
+// For UTF-8 to Latin 1, we can allow any ASCII character, and any continuation byte,
+// but the non-ASCII leading bytes must be 0b11000011 or 0b11000010 and nothing else.
+//
 // Bit 0 = Too Short (lead byte/ASCII followed by lead byte/ASCII)
 // Bit 1 = Too Long (ASCII followed by continuation)
 // Bit 2 = Overlong 3-byte
@@ -27756,6 +32286,7 @@ using namespace simd;
                                                 // 1111011_ 1000____
                                                 // 11111___ 1000____
     constexpr const uint8_t OVERLONG_4  = 1<<6; // 11110000 1000____
+    constexpr const uint8_t FORBIDDEN  = 0xff;
 
     const simd8 byte_1_high = prev1.shr<4>().lookup_16(
       // 0_______ ________ 
@@ -27766,11 +32297,11 @@ using namespace simd;
       // 1100____ ________ 
       TOO_SHORT | OVERLONG_2,
       // 1101____ ________ 
-      TOO_SHORT,
+      FORBIDDEN,
       // 1110____ ________ 
-      TOO_SHORT | OVERLONG_3 | SURROGATE,
+      FORBIDDEN,
       // 1111____ ________ 
-      TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4
+      FORBIDDEN
     );
     constexpr const uint8_t CARRY = TOO_SHORT | TOO_LONG | TWO_CONTS; // These all have ____ in byte 1 .
     const simd8 byte_1_low = (prev1 & 0x0F).lookup_16(
@@ -27783,23 +32314,23 @@ using namespace simd;
       CARRY,
 
       // ____0100 ________
-      CARRY | TOO_LARGE,
+      FORBIDDEN,
       // ____0101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      FORBIDDEN,
       // ____011_ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      FORBIDDEN,
+      FORBIDDEN,
 
       // ____1___ ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN,
       // ____1101 ________
-      CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE,
-      CARRY | TOO_LARGE | TOO_LARGE_1000,
-      CARRY | TOO_LARGE | TOO_LARGE_1000
+      FORBIDDEN,
+      FORBIDDEN,
+      FORBIDDEN
     );
     const simd8 byte_2_high = input.shr<4>().lookup_16(
       // ________ 0_______ 
@@ -27819,15 +32350,6 @@ using namespace simd;
     );
     return (byte_1_high & byte_1_low & byte_2_high);
   }
-  simdutf_really_inline simd8 check_multibyte_lengths(const simd8 input,
-      const simd8 prev_input, const simd8 sc) {
-    simd8 prev2 = input.prev<2>(prev_input);
-    simd8 prev3 = input.prev<3>(prev_input);
-    simd8 must23 = simd8(must_be_2_3_continuation(prev2, prev3));
-    simd8 must23_80 = must23 & uint8_t(0x80);
-    return must23_80 ^ sc;
-  }
-
 
   struct validating_transcoder {
     // If this is nonzero, there has been a UTF-8 error.
@@ -27841,33 +32363,31 @@ using namespace simd;
       // Flip prev1...prev3 so we can easily determine if they are 2+, 3+ or 4+ lead bytes
       // (2, 3, 4-byte leads become large positive numbers instead of small negative numbers)
       simd8 prev1 = input.prev<1>(prev_input);
-      simd8 sc = check_special_cases(input, prev1);
-      this->error |= check_multibyte_lengths(input, prev_input, sc);
+      this->error |= check_special_cases(input, prev1);
     }
 
 
-
-    simdutf_really_inline size_t convert(const char* in, size_t size, char32_t* utf32_output) {
+    simdutf_really_inline size_t convert(const char* in, size_t size, char* latin1_output) {
       size_t pos = 0;
-      char32_t* start{utf32_output};
+      char* start{latin1_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 4; margin--) {
-        leading_byte += (int8_t(in[margin-1]) > -65);
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65); //twos complement of -65 is 1011 1111 ...
       }
-      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf32(utf32_output);
-          utf32_output += 64;
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
@@ -27883,7 +32403,7 @@ using namespace simd;
             this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
             this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
           }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in this case, we also have ASCII to account for.
           uint64_t utf8_leading_mask = ~utf8_continuation_mask;
           uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
           // We process in blocks of up to 12 bytes except possibly
@@ -27901,8 +32421,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
-                            utf8_end_of_code_point_mask, utf32_output);
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -27914,55 +32434,151 @@ using namespace simd;
       }
       if(errors()) { return 0; }
       if(pos < size) {
-        size_t howmany  = scalar::utf8_to_utf32::convert(in + pos, size - pos, utf32_output);
+        size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
         if(howmany == 0) { return 0; }
-        utf32_output += howmany;
+        latin1_output += howmany;
       }
-      return utf32_output - start;
+      return latin1_output - start;
     }
 
-    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char32_t* utf32_output) {
+    simdutf_really_inline result convert_with_errors(const char* in, size_t size, char* latin1_output) {
       size_t pos = 0;
-      char32_t* start{utf32_output};
+      char* start{latin1_output};
       // In the worst case, we have the haswell kernel which can cause an overflow of
-      // 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
       // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
       // much more than 8 bytes. However, you cannot generally assume that you have valid
-      // UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
       // to give us a good margin.
       size_t leading_byte = 0;
       size_t margin = size;
-      for(; margin > 0 && leading_byte < 4; margin--) {
+      for(; margin > 0 && leading_byte < 8; margin--) {
         leading_byte += (int8_t(in[margin-1]) > -65);
       }
-      // If the input is long enough, then we have that margin-1 is the fourth last leading byte.
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
       const size_t safety_margin = size - margin + 1; // to avoid overruns!
       while(pos + 64 + safety_margin <= size) {
         simd8x64 input(reinterpret_cast(in + pos));
         if(input.is_ascii()) {
-          input.store_ascii_as_utf32(utf32_output);
-          utf32_output += 64;
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
+          pos += 64;
+        } else {
+          // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
+          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
+              "We support either two or four chunks per 64-byte block.");
+          auto zero = simd8{uint8_t(0)};
+          if(simd8x64::NUM_CHUNKS == 2) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+          } else if(simd8x64::NUM_CHUNKS == 4) {
+            this->check_utf8_bytes(input.chunks[0], zero);
+            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
+            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
+            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
+          }
+          if (errors()) {
+            // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+            // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+            result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
+            res.count += pos;
+            return res;
+          }
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_leading_mask = ~utf8_continuation_mask;
+          uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
+          // We process in blocks of up to 12 bytes except possibly
+          // for fast paths which may process up to 16 bytes. For the
+          // slow path to work, we should have at least 12 input bytes left.
+          size_t max_starting_point = (pos + 64) - 12;
+          // Next loop is going to run at least five times.
+          while(pos < max_starting_point) {
+            // Performance note: our ability to compute 'consumed' and
+            // then shift and recompute is critical. If there is a
+            // latency of, say, 4 cycles on getting 'consumed', then
+            // the inner loop might have a total latency of about 6 cycles.
+            // Yet we process between 6 to 12 inputs bytes, thus we get
+            // a speed limit between 1 cycle/byte and 0.5 cycle/byte
+            // for this section of the code. Hence, there is a limit
+            // to how much we can further increase this latency before
+            // it seriously harms performance.
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
+            pos += consumed;
+            utf8_end_of_code_point_mask >>= consumed;
+          }
+          // At this point there may remain between 0 and 12 bytes in the
+          // 64-byte block. These bytes will be processed again. So we have an
+          // 80% efficiency (in the worst case). In practice we expect an
+          // 85% to 90% efficiency.
+        }
+      }
+      if(errors()) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
+        res.count += pos;
+        return res;
+      }
+      if(pos < size) {
+        // rewind_and_convert_with_errors will seek a potential error from in+pos onward,
+        // with the ability to go back up to pos bytes, and read size-pos bytes forward.
+        result res = scalar::utf8_to_latin1::rewind_and_convert_with_errors(pos, in + pos, size - pos, latin1_output);
+        if (res.error) {    // In case of error, we want the error position
+          res.count += pos;
+          return res;
+        } else {    // In case of success, we want the number of word written
+          latin1_output += res.count;
+        }
+      }
+      return result(error_code::SUCCESS, latin1_output - start);
+    }
+
+    simdutf_really_inline bool errors() const {
+      return this->error.any_bits_set_anywhere();
+    }
+
+  }; // struct utf8_checker
+} // utf8_to_latin1 namespace
+} // unnamed namespace
+} // namespace westmere
+} // namespace simdutf
+/* end file src/generic/utf8_to_latin1/utf8_to_latin1.h */
+/* begin file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */
+
+
+namespace simdutf {
+namespace westmere {
+namespace {
+namespace utf8_to_latin1 {
+using namespace simd;
+
+
+    simdutf_really_inline size_t convert_valid(const char* in, size_t size, char* latin1_output) {
+      size_t pos = 0;
+      char* start{latin1_output};
+      // In the worst case, we have the haswell kernel which can cause an overflow of
+      // 8 bytes when calling convert_masked_utf8_to_latin1. If you skip the last 16 bytes,
+      // and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
+      // much more than 8 bytes. However, you cannot generally assume that you have valid
+      // UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
+      // to give us a good margin.
+      size_t leading_byte = 0;
+      size_t margin = size;
+      for(; margin > 0 && leading_byte < 8; margin--) {
+        leading_byte += (int8_t(in[margin-1]) > -65); //twos complement of -65 is 1011 1111 ...
+      }
+      // If the input is long enough, then we have that margin-1 is the eight last leading byte.
+      const size_t safety_margin = size - margin + 1; // to avoid overruns!
+      while(pos + 64 + safety_margin <= size) {
+        simd8x64 input(reinterpret_cast(in + pos));
+        if(input.is_ascii()) {
+          input.store((int8_t*)latin1_output);
+          latin1_output += 64;
           pos += 64;
         } else {
           // you might think that a for-loop would work, but under Visual Studio, it is not good enough.
-          static_assert((simd8x64::NUM_CHUNKS == 2) || (simd8x64::NUM_CHUNKS == 4),
-              "We support either two or four chunks per 64-byte block.");
-          auto zero = simd8{uint8_t(0)};
-          if(simd8x64::NUM_CHUNKS == 2) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-          } else if(simd8x64::NUM_CHUNKS == 4) {
-            this->check_utf8_bytes(input.chunks[0], zero);
-            this->check_utf8_bytes(input.chunks[1], input.chunks[0]);
-            this->check_utf8_bytes(input.chunks[2], input.chunks[1]);
-            this->check_utf8_bytes(input.chunks[3], input.chunks[2]);
-          }
-          if (errors()) {
-            result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
-            res.count += pos;
-            return res;
-          }
-          uint64_t utf8_continuation_mask = input.lt(-65 + 1);
+          uint64_t utf8_continuation_mask = input.lt(-65 + 1); // -64 is 1100 0000 in twos complement. Note: in this case, we also have ASCII to account for.
           uint64_t utf8_leading_mask = ~utf8_continuation_mask;
           uint64_t utf8_end_of_code_point_mask = utf8_leading_mask>>1;
           // We process in blocks of up to 12 bytes except possibly
@@ -27980,8 +32596,8 @@ using namespace simd;
             // for this section of the code. Hence, there is a limit
             // to how much we can further increase this latency before
             // it seriously harms performance.
-            size_t consumed = convert_masked_utf8_to_utf32(in + pos,
-                            utf8_end_of_code_point_mask, utf32_output);
+            size_t consumed = convert_masked_utf8_to_latin1(in + pos,
+                            utf8_end_of_code_point_mask, latin1_output);
             pos += consumed;
             utf8_end_of_code_point_mask >>= consumed;
           }
@@ -27991,146 +32607,22 @@ using namespace simd;
           // 85% to 90% efficiency.
         }
       }
-      if(errors()) {
-        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
-        res.count += pos;
-        return res;
-      }
       if(pos < size) {
-        result res = scalar::utf8_to_utf32::rewind_and_convert_with_errors(pos, in + pos, size - pos, utf32_output);
-        if (res.error) {    // In case of error, we want the error position
-          res.count += pos;
-          return res;
-        } else {    // In case of success, we want the number of word written
-          utf32_output += res.count;
-        }
+        size_t howmany  = scalar::utf8_to_latin1::convert(in + pos, size - pos, latin1_output);
+        if(howmany == 0) { return 0; }
+        latin1_output += howmany;
       }
-      return result(error_code::SUCCESS, utf32_output - start);
-    }
-
-    simdutf_really_inline bool errors() const {
-      return this->error.any_bits_set_anywhere();
-    }
-
-  }; // struct utf8_checker
-} // utf8_to_utf32 namespace
-} // unnamed namespace
-} // namespace westmere
-} // namespace simdutf
-/* end file src/generic/utf8_to_utf32/utf8_to_utf32.h */
-// other functions
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf8.h
-/* begin file src/generic/utf8.h */
-
-namespace simdutf {
-namespace westmere {
-namespace {
-namespace utf8 {
-
-using namespace simd;
-
-simdutf_really_inline size_t count_code_points(const char* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    for(;pos + 64 <= size; pos += 64) {
-      simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      count += 64 - count_ones(utf8_continuation_mask);
+      return latin1_output - start;
     }
-    return count + scalar::utf8::count_code_points(in + pos, size - pos);
-}
-
-
-simdutf_really_inline size_t utf16_length_from_utf8(const char* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    // This algorithm could no doubt be improved!
-    for(;pos + 64 <= size; pos += 64) {
-      simd8x64 input(reinterpret_cast(in + pos));
-      uint64_t utf8_continuation_mask = input.lt(-65 + 1);
-      // We count one word for anything that is not a continuation (so
-      // leading bytes).
-      count += 64 - count_ones(utf8_continuation_mask);
-      int64_t utf8_4byte = input.gteq_unsigned(240);
-      count += count_ones(utf8_4byte);
-    }
-    return count + scalar::utf8::utf16_length_from_utf8(in + pos, size - pos);
-}
-
-
-simdutf_really_inline size_t utf32_length_from_utf8(const char* in, size_t size) {
-    return count_code_points(in, size);
-}
-} // utf8 namespace
-} // unnamed namespace
-} // namespace westmere
-} // namespace simdutf
-/* end file src/generic/utf8.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=generic/utf16.h
-/* begin file src/generic/utf16.h */
-namespace simdutf {
-namespace westmere {
-namespace {
-namespace utf16 {
-
-template 
-simdutf_really_inline size_t count_code_points(const char16_t* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    for(;pos + 32 <= size; pos += 32) {
-      simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
-      uint64_t not_pair = input.not_in_range(0xDC00, 0xDFFF);
-      count += count_ones(not_pair) / 2;
-    }
-    return count + scalar::utf16::count_code_points(in + pos, size - pos);
-}
-
-template 
-simdutf_really_inline size_t utf8_length_from_utf16(const char16_t* in, size_t size) {
-    size_t pos = 0;
-    size_t count = 0;
-    // This algorithm could no doubt be improved!
-    for(;pos + 32 <= size; pos += 32) {
-      simd16x32 input(reinterpret_cast(in + pos));
-      if (!match_system(big_endian)) input.swap_bytes();
-      uint64_t ascii_mask = input.lteq(0x7F);
-      uint64_t twobyte_mask = input.lteq(0x7FF);
-      uint64_t not_pair_mask = input.not_in_range(0xD800, 0xDFFF);
-
-      size_t ascii_count = count_ones(ascii_mask) / 2;
-      size_t twobyte_count = count_ones(twobyte_mask & ~ ascii_mask) / 2;
-      size_t threebyte_count = count_ones(not_pair_mask & ~ twobyte_mask) / 2;
-      size_t fourbyte_count = 32 - count_ones(not_pair_mask) / 2;
-      count += 2 * fourbyte_count + 3 * threebyte_count + 2 * twobyte_count + ascii_count;
-    }
-    return count + scalar::utf16::utf8_length_from_utf16(in + pos, size - pos);
-}
 
-template 
-simdutf_really_inline size_t utf32_length_from_utf16(const char16_t* in, size_t size) {
-    return count_code_points(in, size);
-}
-
-simdutf_really_inline void change_endianness_utf16(const char16_t* in, size_t size, char16_t* output) {
-  size_t pos = 0;
-
-  while (pos + 32 <= size) {
-    simd16x32 input(reinterpret_cast(in + pos));
-    input.swap_bytes();
-    input.store(reinterpret_cast(output));
-    pos += 32;
-    output += 32;
-  }
+  }; 
+}   // utf8_to_latin1 namespace
+}   // unnamed namespace
+}   // namespace westmere
+ // namespace simdutf
+/* end file src/generic/utf8_to_latin1/valid_utf8_to_latin1.h */
 
-  scalar::utf16::change_endianness_utf16(in + pos, size - pos, output);
-}
 
-} // utf16
-} // unnamed namespace
-} // namespace westmere
-} // namespace simdutf
-/* end file src/generic/utf16.h */
 //
 // Implementation-specific overrides
 //
@@ -28226,6 +32718,74 @@ simdutf_warn_unused result implementation::validate_utf32_with_errors(const char
   }
 }
 
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf8(const char * buf, size_t len, char* utf8_output) const noexcept {
+
+  std::pair ret = sse_convert_latin1_to_utf8(buf, len, utf8_output);
+  size_t converted_chars = ret.second - utf8_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_converted_chars = scalar::latin1_to_utf8::convert(
+      ret.first, len - (ret.first - buf), ret.second);
+    converted_chars += scalar_converted_chars;
+  }
+
+  return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+    std::pair ret = sse_convert_latin1_to_utf16(buf, len, utf16_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf16_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf16be(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
+    std::pair ret = sse_convert_latin1_to_utf16(buf, len, utf16_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf16_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf16::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+simdutf_warn_unused size_t implementation::convert_latin1_to_utf32(const char* buf, size_t len, char32_t* utf32_output) const noexcept {
+    std::pair ret = sse_convert_latin1_to_utf32(buf, len, utf32_output);
+    if (ret.first == nullptr) { return 0; }
+    size_t converted_chars = ret.second - utf32_output;
+    if (ret.first != buf + len) {
+        const size_t scalar_converted_chars = scalar::latin1_to_utf32::convert(
+                                              ret.first, len - (ret.first - buf), ret.second);
+        if (scalar_converted_chars == 0) { return 0; }
+        converted_chars += scalar_converted_chars;
+    }
+    return converted_chars;
+}
+
+
+simdutf_warn_unused size_t implementation::convert_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  utf8_to_latin1::validating_transcoder converter;
+  return converter.convert(buf, len, latin1_output);
+}
+
+simdutf_warn_unused result implementation::convert_utf8_to_latin1_with_errors(const char* buf, size_t len, char* latin1_output) const noexcept {
+  utf8_to_latin1::validating_transcoder converter;
+  return converter.convert_with_errors(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf8_to_latin1(const char* buf, size_t len, char* latin1_output) const noexcept {
+  return westmere::utf8_to_latin1::convert_valid(buf,len,latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf8_to_utf16le(const char* buf, size_t len, char16_t* utf16_output) const noexcept {
   utf8_to_utf16::validating_transcoder converter;
   return converter.convert(buf, len, utf16_output);
@@ -28272,6 +32832,79 @@ simdutf_warn_unused size_t implementation::convert_valid_utf8_to_utf32(const cha
   return utf8_to_utf32::convert_valid(input, size,  utf32_output);
 }
 
+simdutf_warn_unused size_t implementation::convert_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = sse_convert_utf16_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused size_t implementation::convert_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = sse_convert_utf16_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+
+  if (ret.first != buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf16_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+simdutf_warn_unused result implementation::convert_utf16le_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = sse_convert_utf16_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused result implementation::convert_utf16be_to_latin1_with_errors(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = sse_convert_utf16_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
+  if (ret.first.count != len) { // All good so far, but not finished
+    result scalar_res = scalar::utf16_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: we could provide an optimized function.
+  return convert_utf16be_to_latin1(buf, len, latin1_output);
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf16le_to_latin1(const char16_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: we could provide an optimized function.
+  return convert_utf16le_to_latin1(buf, len, latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf16le_to_utf8(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
   std::pair ret = sse_convert_utf16_to_utf8(buf, len, utf8_output);
   if (ret.first == nullptr) { return 0; }
@@ -28299,7 +32932,7 @@ simdutf_warn_unused size_t implementation::convert_utf16be_to_utf8(const char16_
 }
 
 simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -28312,12 +32945,12 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf8_with_errors(c
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(const char16_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf16_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -28330,7 +32963,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf8_with_errors(c
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -28342,6 +32975,43 @@ simdutf_warn_unused size_t implementation::convert_valid_utf16be_to_utf8(const c
   return convert_utf16be_to_utf8(buf, len, utf8_output);
 }
 
+simdutf_warn_unused size_t implementation::convert_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  std::pair ret = sse_convert_utf32_to_latin1(buf, len, latin1_output);
+  if (ret.first == nullptr) { return 0; }
+  size_t saved_bytes = ret.second - latin1_output;
+  // if (ret.first != buf + len) {
+  if (ret.first < buf + len) {
+    const size_t scalar_saved_bytes = scalar::utf32_to_latin1::convert(
+                                        ret.first, len - (ret.first - buf), ret.second);
+    if (scalar_saved_bytes == 0) { return 0; }
+    saved_bytes += scalar_saved_bytes;
+  }
+  return saved_bytes;
+}
+
+
+simdutf_warn_unused result implementation::convert_utf32_to_latin1_with_errors(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
+  std::pair ret = westmere::sse_convert_utf32_to_latin1_with_errors(buf, len, latin1_output);
+  if (ret.first.count != len) {
+    result scalar_res = scalar::utf32_to_latin1::convert_with_errors(
+                                        buf + ret.first.count, len - ret.first.count, ret.second);
+    if (scalar_res.error) {
+      scalar_res.count += ret.first.count;
+      return scalar_res;
+    } else {
+      ret.second += scalar_res.count;
+    }
+  }
+  ret.first.count = ret.second - latin1_output;   // Set count to the number of 8-bit code units written
+  return ret.first;
+}
+
+simdutf_warn_unused size_t implementation::convert_valid_utf32_to_latin1(const char32_t* buf, size_t len, char* latin1_output) const noexcept {
+  // optimization opportunity: we could provide an optimized function.
+  return convert_utf32_to_latin1(buf,len,latin1_output);
+}
+
 simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
   std::pair ret = sse_convert_utf32_to_utf8(buf, len, utf8_output);
   if (ret.first == nullptr) { return 0; }
@@ -28356,7 +33026,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf8(const char32_t*
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(const char32_t* buf, size_t len, char* utf8_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf32_to_utf8_with_errors(buf, len, utf8_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf8::convert_with_errors(
@@ -28368,7 +33038,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf8_with_errors(con
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf8_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -28399,7 +33069,7 @@ simdutf_warn_unused size_t implementation::convert_utf16be_to_utf32(const char16
 }
 
 simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -28412,12 +33082,12 @@ simdutf_warn_unused result implementation::convert_utf16le_to_utf32_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(const char16_t* buf, size_t len, char32_t* utf32_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf16_to_utf32_with_errors(buf, len, utf32_output);
   if (ret.first.error) { return ret.first; }  // Can return directly since scalar fallback already found correct ret.first.count
   if (ret.first.count != len) { // All good so far, but not finished
@@ -28430,7 +33100,7 @@ simdutf_warn_unused result implementation::convert_utf16be_to_utf32_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf32_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -28465,7 +33135,7 @@ simdutf_warn_unused size_t implementation::convert_utf32_to_utf16be(const char32
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
@@ -28477,12 +33147,12 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16le_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
 simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(const char32_t* buf, size_t len, char16_t* utf16_output) const noexcept {
-  // ret.first.count is always the position in the buffer, not the number of words written even if finished
+  // ret.first.count is always the position in the buffer, not the number of code units written even if finished
   std::pair ret = westmere::sse_convert_utf32_to_utf16_with_errors(buf, len, utf16_output);
   if (ret.first.count != len) {
     result scalar_res = scalar::utf32_to_utf16::convert_with_errors(
@@ -28494,7 +33164,7 @@ simdutf_warn_unused result implementation::convert_utf32_to_utf16be_with_errors(
       ret.second += scalar_res.count;
     }
   }
-  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit words written
+  ret.first.count = ret.second - utf16_output;   // Set count to the number of 8-bit code units written
   return ret.first;
 }
 
@@ -28530,6 +33200,18 @@ simdutf_warn_unused size_t implementation::count_utf8(const char * input, size_t
   return utf8::count_code_points(input, length);
 }
 
+simdutf_warn_unused size_t implementation::latin1_length_from_utf8(const char* buf, size_t len) const noexcept {
+  return count_utf8(buf,len);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf16(size_t length) const noexcept {
+  return scalar::utf16::latin1_length_from_utf16(length);
+}
+
+simdutf_warn_unused size_t implementation::latin1_length_from_utf32(size_t length) const noexcept {
+  return scalar::utf32::latin1_length_from_utf32(length);
+}
+
 simdutf_warn_unused size_t implementation::utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
   return utf16::utf8_length_from_utf16(input, length);
 }
@@ -28538,6 +33220,61 @@ simdutf_warn_unused size_t implementation::utf8_length_from_utf16be(const char16
   return utf16::utf8_length_from_utf16(input, length);
 }
 
+simdutf_warn_unused size_t implementation::utf16_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf16_length_from_latin1(length);
+}
+
+simdutf_warn_unused size_t implementation::utf32_length_from_latin1(size_t length) const noexcept {
+  return scalar::latin1::utf32_length_from_latin1(length);
+}
+
+simdutf_warn_unused size_t implementation::utf8_length_from_latin1(const char * input, size_t len) const noexcept {
+  const uint8_t *str = reinterpret_cast(input);
+  size_t answer = len / sizeof(__m128i) * sizeof(__m128i);
+  size_t i = 0;
+  __m128i two_64bits = _mm_setzero_si128();
+  while (i + sizeof(__m128i) <= len) {
+    __m128i runner = _mm_setzero_si128();
+    size_t iterations = (len - i) / sizeof(__m128i);
+    if (iterations > 255) {
+      iterations = 255;
+    }
+    size_t max_i = i + iterations * sizeof(__m128i) - sizeof(__m128i);
+    for (; i + 4*sizeof(__m128i) <= max_i; i += 4*sizeof(__m128i)) {
+      __m128i input1 = _mm_loadu_si128((const __m128i *)(str + i));
+      __m128i input2 = _mm_loadu_si128((const __m128i *)(str + i + sizeof(__m128i)));
+      __m128i input3 = _mm_loadu_si128((const __m128i *)(str + i + 2*sizeof(__m128i)));
+      __m128i input4 = _mm_loadu_si128((const __m128i *)(str + i + 3*sizeof(__m128i)));
+      __m128i input12 = _mm_add_epi8(
+                                      _mm_cmpgt_epi8(
+                                                    _mm_setzero_si128(), 
+                                                    input1),
+                                      _mm_cmpgt_epi8(
+                                                    _mm_setzero_si128(),
+                                                    input2));
+      __m128i input34 = _mm_add_epi8(
+                                      _mm_cmpgt_epi8(
+                                                    _mm_setzero_si128(),
+                                                    input3),
+                                      _mm_cmpgt_epi8(
+                                                    _mm_setzero_si128(),
+                                                    input4));
+      __m128i input1234 = _mm_add_epi8(input12, input34);
+      runner = _mm_sub_epi8(runner, input1234);
+    }
+    for (; i <= max_i; i += sizeof(__m128i)) {
+      __m128i more_input = _mm_loadu_si128((const __m128i *)(str + i));
+      runner = _mm_sub_epi8(
+          runner, _mm_cmpgt_epi8(_mm_setzero_si128(), more_input));
+    }
+    two_64bits = _mm_add_epi64(
+        two_64bits, _mm_sad_epu8(runner, _mm_setzero_si128()));
+  }
+  answer += _mm_extract_epi64(two_64bits, 0) +
+            _mm_extract_epi64(two_64bits, 1);
+  return answer + scalar::latin1::utf8_length_from_latin1(reinterpret_cast(str + i), len - i);
+}
+
 simdutf_warn_unused size_t implementation::utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept {
   return utf16::utf32_length_from_utf16(input, length);
 }
@@ -28592,13 +33329,12 @@ simdutf_warn_unused size_t implementation::utf16_length_from_utf32(const char32_
 }
 
 simdutf_warn_unused size_t implementation::utf32_length_from_utf8(const char * input, size_t length) const noexcept {
-  return scalar::utf8::count_code_points(input, length);
+  return utf8::count_code_points(input, length);
 }
 
 } // namespace westmere
 } // namespace simdutf
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/src, filename=simdutf/westmere/end.h
 /* begin file src/simdutf/westmere/end.h */
 #if SIMDUTF_CAN_ALWAYS_RUN_WESTMERE
 // nothing needed.
diff --git a/deps/simdutf/simdutf.h b/deps/simdutf/simdutf.h
index 6ab1c34d7b30a7..6b40666f0170f8 100644
--- a/deps/simdutf/simdutf.h
+++ b/deps/simdutf/simdutf.h
@@ -1,11 +1,9 @@
-/* auto-generated on 2023-10-08 13:48:09 -0400. Do not edit! */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf.h
+/* auto-generated on 2023-11-15 17:34:03 -0500. Do not edit! */
 /* begin file include/simdutf.h */
 #ifndef SIMDUTF_H
 #define SIMDUTF_H
 #include 
 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/compiler_check.h
 /* begin file include/simdutf/compiler_check.h */
 #ifndef SIMDUTF_COMPILER_CHECK_H
 #define SIMDUTF_COMPILER_CHECK_H
@@ -43,13 +41,11 @@
 
 #endif // SIMDUTF_COMPILER_CHECK_H
 /* end file include/simdutf/compiler_check.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/common_defs.h
 /* begin file include/simdutf/common_defs.h */
 #ifndef SIMDUTF_COMMON_DEFS_H
 #define SIMDUTF_COMMON_DEFS_H
 
 #include 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/portability.h
 /* begin file include/simdutf/portability.h */
 #ifndef SIMDUTF_PORTABILITY_H
 #define SIMDUTF_PORTABILITY_H
@@ -167,11 +163,8 @@
 
 #ifdef SIMDUTF_IS_32BITS
 #ifndef SIMDUTF_NO_PORTABILITY_WARNING
-#pragma message("The simdutf library is designed \
-for 64-bit processors and it seems that you are not \
-compiling for a known 64-bit platform. All fast kernels \
-will be disabled and performance may be poor. Please \
-use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
+// In the future, we may want to warn users of 32-bit systems that
+// the simdutf does not support accelerated kernels for such systems.
 #endif // SIMDUTF_NO_PORTABILITY_WARNING
 #endif // SIMDUTF_IS_32BITS
 
@@ -280,7 +273,6 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
 
 #endif // SIMDUTF_PORTABILITY_H
 /* end file include/simdutf/portability.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/avx512.h
 /* begin file include/simdutf/avx512.h */
 #ifndef SIMDUTF_AVX512_H_
 #define SIMDUTF_AVX512_H_
@@ -483,7 +475,6 @@ use a 64-bit target such as x64, 64-bit ARM or 64-bit PPC.")
 
 #endif // SIMDUTF_COMMON_DEFS_H
 /* end file include/simdutf/common_defs.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/encoding_types.h
 /* begin file include/simdutf/encoding_types.h */
 #include 
 
@@ -495,13 +486,14 @@ enum encoding_type {
         UTF16_BE = 4,   // BOM 0xfe 0xff
         UTF32_LE = 8,   // BOM 0xff 0xfe 0x00 0x00
         UTF32_BE = 16,   // BOM 0x00 0x00 0xfe 0xff
+        Latin1 = 32,
 
         unspecified = 0
 };
 
 enum endianness {
-        LITTLE,
-        BIG
+        LITTLE = 0,
+        BIG = 1
 };
 
 bool match_system(endianness e);
@@ -514,7 +506,7 @@ namespace BOM {
 /**
  * Checks for a BOM. If not, returns unspecified
  * @param input         the string to process
- * @param length        the length of the string in words
+ * @param length        the length of the string in code units
  * @return the corresponding encoding
  */
 
@@ -531,7 +523,6 @@ size_t bom_byte_size(encoding_type bom);
 } // BOM namespace
 } // simdutf namespace
 /* end file include/simdutf/encoding_types.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/error.h
 /* begin file include/simdutf/error.h */
 #ifndef ERROR_H
 #define ERROR_H
@@ -545,15 +536,16 @@ enum error_code {
   TOO_LONG,     // We either have too many consecutive continuation bytes or the string starts with a continuation byte.
   OVERLONG,     // The decoded character must be above U+7F for two-byte characters, U+7FF for three-byte characters,
                 // and U+FFFF for four-byte characters.
-  TOO_LARGE,    // The decoded character must be less than or equal to U+10FFFF OR less than or equal than U+7F for ASCII.
+  TOO_LARGE,    // The decoded character must be less than or equal to U+10FFFF,less than or equal than U+7F for ASCII OR less than equal than U+FF for Latin1
   SURROGATE,    // The decoded character must be not be in U+D800...DFFF (UTF-8 or UTF-32) OR
-                // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16)
+                // a high surrogate must be followed by a low surrogate and a low surrogate must be preceded by a high surrogate (UTF-16) OR
+                // there must be no surrogate at all (Latin1)
   OTHER         // Not related to validation/transcoding.
 };
 
 struct result {
   error_code error;
-  size_t count;     // In case of error, indicates the position of the error. In case of success, indicates the number of words validated/written.
+  size_t count;     // In case of error, indicates the position of the error. In case of success, indicates the number of code units validated/written.
 
   simdutf_really_inline result();
 
@@ -568,7 +560,6 @@ SIMDUTF_PUSH_DISABLE_WARNINGS
 SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 
 // Public API
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/simdutf_version.h
 /* begin file include/simdutf/simdutf_version.h */
 // /include/simdutf/simdutf_version.h automatically generated by release.py,
 // do not change by hand
@@ -576,28 +567,27 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
 #define SIMDUTF_SIMDUTF_VERSION_H
 
 /** The version of simdutf being used (major.minor.revision) */
-#define SIMDUTF_VERSION "3.2.18"
+#define SIMDUTF_VERSION "4.0.4"
 
 namespace simdutf {
 enum {
   /**
    * The major version (MAJOR.minor.revision) of simdutf being used.
    */
-  SIMDUTF_VERSION_MAJOR = 3,
+  SIMDUTF_VERSION_MAJOR = 4,
   /**
    * The minor version (major.MINOR.revision) of simdutf being used.
    */
-  SIMDUTF_VERSION_MINOR = 2,
+  SIMDUTF_VERSION_MINOR = 0,
   /**
    * The revision (major.minor.REVISION) of simdutf being used.
    */
-  SIMDUTF_VERSION_REVISION = 18
+  SIMDUTF_VERSION_REVISION = 4
 };
 } // namespace simdutf
 
 #endif // SIMDUTF_SIMDUTF_VERSION_H
 /* end file include/simdutf/simdutf_version.h */
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/implementation.h
 /* begin file include/simdutf/implementation.h */
 #ifndef SIMDUTF_IMPLEMENTATION_H
 #define SIMDUTF_IMPLEMENTATION_H
@@ -607,7 +597,6 @@ enum {
 #endif
 #include 
 #include 
-// dofile: invoked with prepath=/Users/dlemire/CVS/github/simdutf/include, filename=simdutf/internal/isadetection.h
 /* begin file include/simdutf/internal/isadetection.h */
 /* From
 https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
@@ -685,7 +674,8 @@ enum instruction_set {
   AVX512CD = 0x2000,
   AVX512BW = 0x4000,
   AVX512VL = 0x8000,
-  AVX512VBMI2 = 0x10000
+  AVX512VBMI2 = 0x10000,
+  AVX512VPOPCNTDQ = 0x2000
 };
 
 #if defined(__PPC64__)
@@ -840,6 +830,9 @@ static inline uint32_t detect_supported_architectures() {
   if (ecx & cpuid_bit::ecx::avx512vbmi2) {
     host_isa |= instruction_set::AVX512VBMI2;
   }
+  if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
+    host_isa |= instruction_set::AVX512VPOPCNTDQ;
+  }
   return host_isa;
 }
 #else // fallback
@@ -892,7 +885,6 @@ simdutf_really_inline simdutf_warn_unused int detect_encodings(const uint8_t * i
   return detect_encodings(reinterpret_cast(input), length);
 }
 
-
 /**
  * Validate the UTF-8 string. This function may be best when you expect
  * the input to be almost always valid. Otherwise, consider using
@@ -913,7 +905,7 @@ simdutf_warn_unused bool validate_utf8(const char *buf, size_t len) noexcept;
  *
  * @param buf the UTF-8 string to validate.
  * @param len the length of the string in bytes.
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
  */
 simdutf_warn_unused result validate_utf8_with_errors(const char *buf, size_t len) noexcept;
 
@@ -936,7 +928,7 @@ simdutf_warn_unused bool validate_ascii(const char *buf, size_t len) noexcept;
  *
  * @param buf the ASCII string to validate.
  * @param len the length of the string in bytes.
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
  */
 simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t len) noexcept;
 
@@ -950,7 +942,7 @@ simdutf_warn_unused result validate_ascii_with_errors(const char *buf, size_t le
  * This function is not BOM-aware.
  *
  * @param buf the UTF-16 string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
+ * @param len the length of the string in number of 2-byte code units (char16_t).
  * @return true if and only if the string is valid UTF-16.
  */
 simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcept;
@@ -965,7 +957,7 @@ simdutf_warn_unused bool validate_utf16(const char16_t *buf, size_t len) noexcep
  * This function is not BOM-aware.
  *
  * @param buf the UTF-16LE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
+ * @param len the length of the string in number of 2-byte code units (char16_t).
  * @return true if and only if the string is valid UTF-16LE.
  */
 simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexcept;
@@ -980,7 +972,7 @@ simdutf_warn_unused bool validate_utf16le(const char16_t *buf, size_t len) noexc
  * This function is not BOM-aware.
  *
  * @param buf the UTF-16BE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
+ * @param len the length of the string in number of 2-byte code units (char16_t).
  * @return true if and only if the string is valid UTF-16BE.
  */
 simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexcept;
@@ -994,8 +986,8 @@ simdutf_warn_unused bool validate_utf16be(const char16_t *buf, size_t len) noexc
  * This function is not BOM-aware.
  *
  * @param buf the UTF-16 string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ * @param len the length of the string in number of 2-byte code units (char16_t).
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
  */
 simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_t len) noexcept;
 
@@ -1008,8 +1000,8 @@ simdutf_warn_unused result validate_utf16_with_errors(const char16_t *buf, size_
  * This function is not BOM-aware.
  *
  * @param buf the UTF-16LE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ * @param len the length of the string in number of 2-byte code units (char16_t).
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
  */
 simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, size_t len) noexcept;
 
@@ -1022,8 +1014,8 @@ simdutf_warn_unused result validate_utf16le_with_errors(const char16_t *buf, siz
  * This function is not BOM-aware.
  *
  * @param buf the UTF-16BE string to validate.
- * @param len the length of the string in number of 2-byte words (char16_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ * @param len the length of the string in number of 2-byte code units (char16_t).
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
  */
 simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, size_t len) noexcept;
 
@@ -1037,7 +1029,7 @@ simdutf_warn_unused result validate_utf16be_with_errors(const char16_t *buf, siz
  * This function is not BOM-aware.
  *
  * @param buf the UTF-32 string to validate.
- * @param len the length of the string in number of 4-byte words (char32_t).
+ * @param len the length of the string in number of 4-byte code units (char32_t).
  * @return true if and only if the string is valid UTF-32.
  */
 simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcept;
@@ -1051,13 +1043,75 @@ simdutf_warn_unused bool validate_utf32(const char32_t *buf, size_t len) noexcep
  * This function is not BOM-aware.
  *
  * @param buf the UTF-32 string to validate.
- * @param len the length of the string in number of 4-byte words (char32_t).
- * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+ * @param len the length of the string in number of 4-byte code units (char32_t).
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
  */
 simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_t len) noexcept;
 
+  /**
+   * Convert Latin1 string into UTF8 string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return the number of written char; 0 if conversion is not possible
+   */
+  simdutf_warn_unused size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) noexcept;
+
+
+    /**
+   * Convert possibly Latin1 string into UTF-16LE string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1  string to convert
+   * @param length        the length of the string in bytes
+   * @param utf16_buffer  the pointer to buffer that can hold conversion result
+   * @return the number of written char16_t; 0 if conversion is not possible
+   */
+  simdutf_warn_unused size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+  /**
+   * Convert Latin1 string into UTF-16BE string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string in bytes
+   * @param utf16_buffer  the pointer to buffer that can hold conversion result
+   * @return the number of written char16_t; 0 if conversion is not possible
+   */
+  simdutf_warn_unused size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
+  /**
+   * Convert Latin1 string into UTF-32 string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string in bytes
+   * @param utf32_buffer  the pointer to buffer that can hold conversion result
+   * @return the number of written char32_t; 0 if conversion is not possible
+   */
+  simdutf_warn_unused size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
+
+ /**
+   * Convert possibly broken UTF-8 string into latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return the number of written char; 0 if the input was not valid UTF-8 string
+   */
+  simdutf_warn_unused size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
+
 /**
- * Using native endianness; Convert possibly broken UTF-8 string into UTF-16 string.
+ * Using native endianness, convert possibly broken UTF-8 string into a UTF-16 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1069,6 +1123,17 @@ simdutf_warn_unused result validate_utf32_with_errors(const char32_t *buf, size_
  */
 simdutf_warn_unused size_t convert_utf8_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
+
+/**
+ * Using native endianness, convert a Latin1 string into a UTF-16 string.
+ *
+ * @param input         the UTF-8 string to convert
+ * @param length        the length of the string in bytes
+ * @param utf16_buffer  the pointer to buffer that can hold conversion result
+ * @return the number of written char16_t.
+ */
+simdutf_warn_unused size_t convert_latin1_to_utf16(const char * input, size_t length, char16_t* utf16_output) noexcept;
+
 /**
  * Convert possibly broken UTF-8 string into UTF-16LE string.
  *
@@ -1095,8 +1160,22 @@ simdutf_warn_unused size_t convert_utf8_to_utf16le(const char * input, size_t le
  */
 simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
+
+  /**
+   * Convert possibly broken UTF-8 string into latin1 string with errors.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
+   */
+  simdutf_warn_unused result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) noexcept;
+
 /**
- * Using native endianness; Convert possibly broken UTF-8 string into UTF-16
+ * Using native endianness, convert possibly broken UTF-8 string into UTF-16
  * string and stop on error.
  *
  * During the conversion also validation of the input string is done.
@@ -1105,7 +1184,7 @@ simdutf_warn_unused size_t convert_utf8_to_utf16be(const char * input, size_t le
  * @param input         the UTF-8 string to convert
  * @param length        the length of the string in bytes
  * @param utf16_buffer  the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
  */
 simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
@@ -1118,7 +1197,7 @@ simdutf_warn_unused result convert_utf8_to_utf16_with_errors(const char * input,
  * @param input         the UTF-8 string to convert
  * @param length        the length of the string in bytes
  * @param utf16_buffer  the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
  */
 simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
@@ -1131,7 +1210,7 @@ simdutf_warn_unused result convert_utf8_to_utf16le_with_errors(const char * inpu
  * @param input         the UTF-8 string to convert
  * @param length        the length of the string in bytes
  * @param utf16_buffer  the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
  */
 simdutf_warn_unused result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) noexcept;
 
@@ -1157,12 +1236,27 @@ simdutf_warn_unused size_t convert_utf8_to_utf32(const char * input, size_t leng
  * @param input         the UTF-8 string to convert
  * @param length        the length of the string in bytes
  * @param utf32_buffer  the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
  */
 simdutf_warn_unused result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) noexcept;
 
+    /**
+   * Convert valid UTF-8 string into latin1 string.
+   *
+   * This function assumes that the input string is valid UTF-8.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return the number of written char; 0 if the input was not valid UTF-8 string
+   */
+  simdutf_warn_unused size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) noexcept;
+
+
 /**
- * Using native endianness; Convert valid UTF-8 string into UTF-16 string.
+ * Using native endianness, convert valid UTF-8 string into a UTF-16 string.
  *
  * This function assumes that the input string is valid UTF-8.
  *
@@ -1209,8 +1303,31 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf16be(const char * input, siz
  */
 simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) noexcept;
 
+
+/**
+ * Return the number of bytes that this Latin1 string would require in UTF-8 format.
+ *
+ * @param input         the Latin1 string to convert
+ * @param length        the length of the string bytes
+ * @return the number of bytes required to encode the Latin1 string as UTF-8
+ */
+simdutf_warn_unused size_t utf8_length_from_latin1(const char * input, size_t length) noexcept;
+
 /**
- * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format.
+ * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-8 string to convert
+ * @param length        the length of the string in byte
+ * @return the number of bytes required to encode the UTF-8 string as Latin1
+ */
+simdutf_warn_unused size_t latin1_length_from_utf8(const char * input, size_t length) noexcept;
+
+/**
+ * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
  *
  * This function does not validate the input.
  *
@@ -1218,12 +1335,12 @@ simdutf_warn_unused size_t convert_valid_utf8_to_utf32(const char * input, size_
  *
  * @param input         the UTF-8 string to process
  * @param length        the length of the string in bytes
- * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE
+ * @return the number of char16_t code units required to encode the UTF-8 string as UTF-16LE
  */
 simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t length) noexcept;
 
 /**
- * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
+ * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
  *
  * This function is equivalent to count_utf8
  *
@@ -1233,12 +1350,12 @@ simdutf_warn_unused size_t utf16_length_from_utf8(const char * input, size_t len
  *
  * @param input         the UTF-8 string to process
  * @param length        the length of the string in bytes
- * @return the number of char32_t words required to encode the UTF-8 string as UTF-32
+ * @return the number of char32_t code units required to encode the UTF-8 string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t length) noexcept;
 
 /**
- * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string.
+ * Using native endianness, convert possibly broken UTF-16 string into UTF-8 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1246,12 +1363,60 @@ simdutf_warn_unused size_t utf32_length_from_utf8(const char * input, size_t len
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
  */
 simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
+
+
+/**
+ * Using native endianness, convert possibly broken UTF-16 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
+ */
+simdutf_warn_unused size_t convert_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16LE string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
+ */
+simdutf_warn_unused size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16BE string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+ */
+simdutf_warn_unused size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
 /**
  * Convert possibly broken UTF-16LE string into UTF-8 string.
  *
@@ -1261,9 +1426,9 @@ simdutf_warn_unused size_t convert_utf16_to_utf8(const char16_t * input, size_t
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
  */
 simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
@@ -1276,14 +1441,57 @@ simdutf_warn_unused size_t convert_utf16le_to_utf8(const char16_t * input, size_
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
  */
 simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Using native endianness; Convert possibly broken UTF-16 string into UTF-8 string and stop on error.
+ * Using native endianness, convert possibly broken UTF-16 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+ */
+simdutf_warn_unused result convert_utf16_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-16LE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16LE string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+ */
+simdutf_warn_unused result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert possibly broken UTF-16BE string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16BE string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+ */
+simdutf_warn_unused result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
+/**
+ * Using native endianness, convert possibly broken UTF-16 string into UTF-8 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1291,9 +1499,9 @@ simdutf_warn_unused size_t convert_utf16be_to_utf8(const char16_t * input, size_
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
  */
 simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
@@ -1306,9 +1514,9 @@ simdutf_warn_unused result convert_utf16_to_utf8_with_errors(const char16_t * in
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
  */
 simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
@@ -1321,26 +1529,70 @@ simdutf_warn_unused result convert_utf16le_to_utf8_with_errors(const char16_t *
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
  */
 simdutf_warn_unused result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Using native endianness; Convert valid UTF-16 string into UTF-8 string.
+ * Using native endianness, convert valid UTF-16 string into UTF-8 string.
  *
  * This function assumes that the input string is valid UTF-16LE.
  *
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
+
+/**
+ * Using native endianness, convert UTF-16 string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-8.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16 string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written code units; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf16_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert valid UTF-16LE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-16LE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16LE string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written code units; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert valid UTF-16BE string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-16BE.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-16BE string to convert
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written code units; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
 /**
  * Convert valid UTF-16LE string into UTF-8 string.
  *
@@ -1349,9 +1601,9 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf8(const char16_t * input, s
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
@@ -1363,14 +1615,14 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf8(const char16_t * input,
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Using native endianness; Convert possibly broken UTF-16 string into UTF-32 string.
+ * Using native endianness, convert possibly broken UTF-16 string into UTF-32 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1378,9 +1630,9 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf8(const char16_t * input,
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
  */
 simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
@@ -1393,9 +1645,9 @@ simdutf_warn_unused size_t convert_utf16_to_utf32(const char16_t * input, size_t
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
  */
 simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
@@ -1408,14 +1660,14 @@ simdutf_warn_unused size_t convert_utf16le_to_utf32(const char16_t * input, size
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-16LE string
+ * @return number of written code units; 0 if input is not a valid UTF-16LE string
  */
 simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Using native endianness; Convert possibly broken UTF-16 string into
+ * Using native endianness, convert possibly broken UTF-16 string into
  * UTF-32 string and stop on error.
  *
  * During the conversion also validation of the input string is done.
@@ -1424,9 +1676,9 @@ simdutf_warn_unused size_t convert_utf16be_to_utf32(const char16_t * input, size
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
  */
 simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
@@ -1439,9 +1691,9 @@ simdutf_warn_unused result convert_utf16_to_utf32_with_errors(const char16_t * i
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
  */
 simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
@@ -1454,23 +1706,23 @@ simdutf_warn_unused result convert_utf16le_to_utf32_with_errors(const char16_t *
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
  */
 simdutf_warn_unused result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
 /**
- * Using native endianness; Convert valid UTF-16 string into UTF-32 string.
+ * Using native endianness, convert valid UTF-16 string into UTF-32 string.
  *
  * This function assumes that the input string is valid UTF-16 (native endianness).
  *
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
@@ -1482,9 +1734,9 @@ simdutf_warn_unused size_t convert_valid_utf16_to_utf32(const char16_t * input,
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
@@ -1496,12 +1748,26 @@ simdutf_warn_unused size_t convert_valid_utf16le_to_utf32(const char16_t * input
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param utf32_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) noexcept;
 
+
+/*
+ * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
+ *
+ * This function does not validate the input.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param length        the length of the string in 2-byte code units (char16_t)
+ * @return the number of bytes required to encode the UTF-16LE string as Latin1
+ */
+simdutf_warn_unused size_t latin1_length_from_utf16(size_t length) noexcept;
+
+
 /**
  * Using native endianness; Compute the number of bytes that this UTF-16
  * string would require in UTF-8 format.
@@ -1509,7 +1775,7 @@ simdutf_warn_unused size_t convert_valid_utf16be_to_utf32(const char16_t * input
  * This function does not validate the input.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return the number of bytes required to encode the UTF-16LE string as UTF-8
  */
 simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t length) noexcept;
@@ -1520,7 +1786,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16(const char16_t * input, size_t
  * This function does not validate the input.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return the number of bytes required to encode the UTF-16LE string as UTF-8
  */
 simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size_t length) noexcept;
@@ -1531,7 +1797,7 @@ simdutf_warn_unused size_t utf8_length_from_utf16le(const char16_t * input, size
  * This function does not validate the input.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return the number of bytes required to encode the UTF-16BE string as UTF-8
  */
 simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size_t length) noexcept;
@@ -1545,9 +1811,9 @@ simdutf_warn_unused size_t utf8_length_from_utf16be(const char16_t * input, size
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
+ * @return number of written code units; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
 
@@ -1560,9 +1826,9 @@ simdutf_warn_unused size_t convert_utf32_to_utf8(const char32_t * input, size_t
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf8_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
  */
 simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
 
@@ -1574,14 +1840,14 @@ simdutf_warn_unused result convert_utf32_to_utf8_with_errors(const char32_t * in
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf8_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) noexcept;
 
 /**
- * Using native endianness; Convert possibly broken UTF-32 string into UTF-16 string.
+ * Using native endianness, convert possibly broken UTF-32 string into a UTF-16 string.
  *
  * During the conversion also validation of the input string is done.
  * This function is suitable to work with inputs from untrusted sources.
@@ -1589,9 +1855,9 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf8(const char32_t * input, s
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
+ * @return number of written code units; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
@@ -1604,12 +1870,57 @@ simdutf_warn_unused size_t convert_utf32_to_utf16(const char32_t * input, size_t
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
+ * @return number of written code units; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
+/**
+ * Convert possibly broken UTF-32 string into Latin1 string.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-32 string to convert
+ * @param length        the length of the string in 4-byte code units (char32_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return number of written code units; 0 if input is not a valid UTF-32 string
+ */
+simdutf_warn_unused size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
+
+/**
+ * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
+ *
+ * During the conversion also validation of the input string is done.
+ * This function is suitable to work with inputs from untrusted sources.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-32 string to convert
+ * @param length        the length of the string in 4-byte code units (char32_t)
+ * @param latin1_buffer   the pointer to buffer that can hold conversion result
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+ */
+simdutf_warn_unused result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
+/**
+ * Convert valid UTF-32 string into Latin1 string.
+ *
+ * This function assumes that the input string is valid UTF-32.
+ *
+ * This function is not BOM-aware.
+ *
+ * @param input         the UTF-32 string to convert
+ * @param length        the length of the string in 4-byte code units (char32_t)
+ * @param latin1_buffer   the pointer to buffer that can hold the conversion result
+ * @return number of written code units; 0 if conversion is not possible
+ */
+simdutf_warn_unused size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) noexcept;
+
 /**
  * Convert possibly broken UTF-32 string into UTF-16BE string.
  *
@@ -1619,14 +1930,14 @@ simdutf_warn_unused size_t convert_utf32_to_utf16le(const char32_t * input, size
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return number of written words; 0 if input is not a valid UTF-32 string
+ * @return number of written code units; 0 if input is not a valid UTF-32 string
  */
 simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Using native endianness; Convert possibly broken UTF-32 string into UTF-16
+ * Using native endianness, convert possibly broken UTF-32 string into UTF-16
  * string and stop on error.
  *
  * During the conversion also validation of the input string is done.
@@ -1635,9 +1946,9 @@ simdutf_warn_unused size_t convert_utf32_to_utf16be(const char32_t * input, size
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
  */
 simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
@@ -1650,9 +1961,9 @@ simdutf_warn_unused result convert_utf32_to_utf16_with_errors(const char32_t * i
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
  */
 simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
@@ -1665,23 +1976,23 @@ simdutf_warn_unused result convert_utf32_to_utf16le_with_errors(const char32_t *
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold conversion result
- * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+ * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
  */
 simdutf_warn_unused result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
 /**
- * Using native endianness; Convert valid UTF-32 string into UTF-16 string.
+ * Using native endianness, convert valid UTF-32 string into a UTF-16 string.
  *
  * This function assumes that the input string is valid UTF-32.
  *
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
@@ -1693,9 +2004,9 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16(const char32_t * input,
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
@@ -1707,9 +2018,9 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16le(const char32_t * input
  * This function is not BOM-aware.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @param utf16_buffer   the pointer to buffer that can hold the conversion result
- * @return number of written words; 0 if conversion is not possible
+ * @return number of written code units; 0 if conversion is not possible
  */
 simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) noexcept;
 
@@ -1722,7 +2033,7 @@ simdutf_warn_unused size_t convert_valid_utf32_to_utf16be(const char32_t * input
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to process
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @param output        the pointer to buffer that can hold the conversion result
  */
 void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) noexcept;
@@ -1733,18 +2044,18 @@ void change_endianness_utf16(const char16_t * input, size_t length, char16_t * o
  * This function does not validate the input.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @return the number of bytes required to encode the UTF-32 string as UTF-8
  */
 simdutf_warn_unused size_t utf8_length_from_utf32(const char32_t * input, size_t length) noexcept;
 
 /**
- * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
+ * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
  *
  * This function does not validate the input.
  *
  * @param input         the UTF-32 string to convert
- * @param length        the length of the string in 4-byte words (char32_t)
+ * @param length        the length of the string in 4-byte code units (char32_t)
  * @return the number of bytes required to encode the UTF-32 string as UTF-16
  */
 simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_t length) noexcept;
@@ -1760,7 +2071,7 @@ simdutf_warn_unused size_t utf16_length_from_utf32(const char32_t * input, size_
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return the number of bytes required to encode the UTF-16LE string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_t length) noexcept;
@@ -1775,7 +2086,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16(const char16_t * input, size_
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return the number of bytes required to encode the UTF-16LE string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, size_t length) noexcept;
@@ -1790,7 +2101,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16le(const char16_t * input, siz
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to convert
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return the number of bytes required to encode the UTF-16BE string as UTF-32
  */
 simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, size_t length) noexcept;
@@ -1804,7 +2115,7 @@ simdutf_warn_unused size_t utf32_length_from_utf16be(const char16_t * input, siz
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16 string to process
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return number of code points
  */
 simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) noexcept;
@@ -1818,7 +2129,7 @@ simdutf_warn_unused size_t count_utf16(const char16_t * input, size_t length) no
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16LE string to process
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return number of code points
  */
 simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length) noexcept;
@@ -1832,7 +2143,7 @@ simdutf_warn_unused size_t count_utf16le(const char16_t * input, size_t length)
  * This function is not BOM-aware.
  *
  * @param input         the UTF-16BE string to process
- * @param length        the length of the string in 2-byte words (char16_t)
+ * @param length        the length of the string in 2-byte code units (char16_t)
  * @return number of code points
  */
 simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length) noexcept;
@@ -1849,6 +2160,68 @@ simdutf_warn_unused size_t count_utf16be(const char16_t * input, size_t length)
  */
 simdutf_warn_unused size_t count_utf8(const char * input, size_t length) noexcept;
 
+/**
+ * Given a valid UTF-8 string having a possibly truncated last character,
+ * this function checks the end of string. If the last character is truncated (or partial),
+ * then it returns a shorter length (shorter by 1 to 3 bytes) so that the short UTF-8
+ * strings only contain complete characters. If there is no truncated character,
+ * the original length is returned.
+ *
+ * This function assumes that the input string is valid UTF-8, but possibly truncated.
+ *
+ * @param input         the UTF-8 string to process
+ * @param length        the length of the string in bytes
+ * @return the length of the string in bytes, possibly shorter by 1 to 3 bytes
+ */
+simdutf_warn_unused size_t trim_partial_utf8(const char *input, size_t length);
+
+/**
+ * Given a valid UTF-16BE string having a possibly truncated last character,
+ * this function checks the end of string. If the last character is truncated (or partial),
+ * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16BE
+ * strings only contain complete characters. If there is no truncated character,
+ * the original length is returned.
+ *
+ * This function assumes that the input string is valid UTF-16BE, but possibly truncated.
+ *
+ * @param input         the UTF-16BE string to process
+ * @param length        the length of the string in bytes
+ * @return the length of the string in bytes, possibly shorter by 1 unit
+ */
+simdutf_warn_unused size_t trim_partial_utf16be(const char16_t* input, size_t length);
+
+/**
+ * Given a valid UTF-16LE string having a possibly truncated last character,
+ * this function checks the end of string. If the last character is truncated (or partial),
+ * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16LE
+ * strings only contain complete characters. If there is no truncated character,
+ * the original length is returned.
+ *
+ * This function assumes that the input string is valid UTF-16LE, but possibly truncated.
+ *
+ * @param input         the UTF-16LE string to process
+ * @param length        the length of the string in bytes
+ * @return the length of the string in unit, possibly shorter by 1 unit
+ */
+simdutf_warn_unused size_t trim_partial_utf16le(const char16_t* input, size_t length);
+
+
+/**
+ * Given a valid UTF-16 string having a possibly truncated last character,
+ * this function checks the end of string. If the last character is truncated (or partial),
+ * then it returns a shorter length (shorter by 1 unit) so that the short UTF-16
+ * strings only contain complete characters. If there is no truncated character,
+ * the original length is returned.
+ *
+ * This function assumes that the input string is valid UTF-16, but possibly truncated.
+ * We use the native endianness.
+ *
+ * @param input         the UTF-16 string to process
+ * @param length        the length of the string in bytes
+ * @return the length of the string in unit, possibly shorter by 1 unit
+ */
+simdutf_warn_unused size_t trim_partial_utf16(const char16_t* input, size_t length);
+
 /**
  * An implementation of simdutf for a particular CPU architecture.
  *
@@ -1932,7 +2305,7 @@ class implementation {
    *
    * @param buf the UTF-8 string to validate.
    * @param len the length of the string in bytes.
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result validate_utf8_with_errors(const char *buf, size_t len) const noexcept = 0;
 
@@ -1954,7 +2327,7 @@ class implementation {
    *
    * @param buf the ASCII string to validate.
    * @param len the length of the string in bytes.
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result validate_ascii_with_errors(const char *buf, size_t len) const noexcept = 0;
 
@@ -1968,7 +2341,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param buf the UTF-16LE string to validate.
-   * @param len the length of the string in number of 2-byte words (char16_t).
+   * @param len the length of the string in number of 2-byte code units (char16_t).
    * @return true if and only if the string is valid UTF-16LE.
    */
   simdutf_warn_unused virtual bool validate_utf16le(const char16_t *buf, size_t len) const noexcept = 0;
@@ -1983,7 +2356,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param buf the UTF-16BE string to validate.
-   * @param len the length of the string in number of 2-byte words (char16_t).
+   * @param len the length of the string in number of 2-byte code units (char16_t).
    * @return true if and only if the string is valid UTF-16BE.
    */
   simdutf_warn_unused virtual bool validate_utf16be(const char16_t *buf, size_t len) const noexcept = 0;
@@ -1997,8 +2370,8 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param buf the UTF-16LE string to validate.
-   * @param len the length of the string in number of 2-byte words (char16_t).
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @param len the length of the string in number of 2-byte code units (char16_t).
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result validate_utf16le_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
 
@@ -2011,8 +2384,8 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param buf the UTF-16BE string to validate.
-   * @param len the length of the string in number of 2-byte words (char16_t).
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @param len the length of the string in number of 2-byte code units (char16_t).
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result validate_utf16be_with_errors(const char16_t *buf, size_t len) const noexcept = 0;
 
@@ -2024,7 +2397,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param buf the UTF-32 string to validate.
-   * @param len the length of the string in number of 4-byte words (char32_t).
+   * @param len the length of the string in number of 4-byte code units (char32_t).
    * @return true if and only if the string is valid UTF-32.
    */
   simdutf_warn_unused virtual bool validate_utf32(const char32_t *buf, size_t len) const noexcept = 0;
@@ -2037,11 +2410,101 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param buf the UTF-32 string to validate.
-   * @param len the length of the string in number of 4-byte words (char32_t).
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @param len the length of the string in number of 4-byte code units (char32_t).
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result validate_utf32_with_errors(const char32_t *buf, size_t len) const noexcept = 0;
 
+  /**
+   * Convert Latin1 string into UTF8 string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return the number of written char; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_latin1_to_utf8(const char * input, size_t length, char* utf8_output) const noexcept = 0;
+
+
+    /**
+   * Convert possibly Latin1 string into UTF-16LE string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1  string to convert
+   * @param length        the length of the string in bytes
+   * @param utf16_buffer  the pointer to buffer that can hold conversion result
+   * @return the number of written char16_t; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_latin1_to_utf16le(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+  /**
+   * Convert Latin1 string into UTF-16BE string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string in bytes
+   * @param utf16_buffer  the pointer to buffer that can hold conversion result
+   * @return the number of written char16_t; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_latin1_to_utf16be(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
+
+  /**
+   * Convert Latin1 string into UTF-32 string.
+   *
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string in bytes
+   * @param utf32_buffer  the pointer to buffer that can hold conversion result
+   * @return the number of written char32_t; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_latin1_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
+
+ /**
+   * Convert possibly broken UTF-8 string into latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return the number of written char; 0 if the input was not valid UTF-8 string
+   */
+  simdutf_warn_unused virtual size_t convert_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+  /**
+   * Convert possibly broken UTF-8 string into latin1 string with errors
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
+   */
+  simdutf_warn_unused virtual result convert_utf8_to_latin1_with_errors(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+    /**
+   * Convert valid UTF-8 string into latin1 string.
+   *
+   * This function assumes that the input string is valid UTF-8.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in bytes
+   * @param latin1_output  the pointer to buffer that can hold conversion result
+   * @return the number of written char; 0 if the input was not valid UTF-8 string
+   */
+  simdutf_warn_unused virtual size_t convert_valid_utf8_to_latin1(const char * input, size_t length, char* latin1_output) const noexcept = 0;
+
+
   /**
    * Convert possibly broken UTF-8 string into UTF-16LE string.
    *
@@ -2077,7 +2540,7 @@ class implementation {
    * @param input         the UTF-8 string to convert
    * @param length        the length of the string in bytes
    * @param utf16_buffer  the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result convert_utf8_to_utf16le_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
 
@@ -2090,7 +2553,7 @@ class implementation {
    * @param input         the UTF-8 string to convert
    * @param length        the length of the string in bytes
    * @param utf16_buffer  the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of words validated if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of code units validated if successful.
    */
   simdutf_warn_unused virtual result convert_utf8_to_utf16be_with_errors(const char * input, size_t length, char16_t* utf16_output) const noexcept = 0;
 
@@ -2116,7 +2579,7 @@ class implementation {
    * @param input         the UTF-8 string to convert
    * @param length        the length of the string in bytes
    * @param utf32_buffer  the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
    */
   simdutf_warn_unused virtual result convert_utf8_to_utf32_with_errors(const char * input, size_t length, char32_t* utf32_output) const noexcept = 0;
 
@@ -2157,18 +2620,18 @@ class implementation {
   simdutf_warn_unused virtual size_t convert_valid_utf8_to_utf32(const char * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
   /**
-   * Compute the number of 2-byte words that this UTF-8 string would require in UTF-16LE format.
+   * Compute the number of 2-byte code units that this UTF-8 string would require in UTF-16LE format.
    *
    * This function does not validate the input.
    *
    * @param input         the UTF-8 string to process
    * @param length        the length of the string in bytes
-   * @return the number of char16_t words required to encode the UTF-8 string as UTF-16LE
+   * @return the number of char16_t code units required to encode the UTF-8 string as UTF-16LE
    */
   simdutf_warn_unused virtual size_t utf16_length_from_utf8(const char * input, size_t length) const noexcept = 0;
 
    /**
-   * Compute the number of 4-byte words that this UTF-8 string would require in UTF-32 format.
+   * Compute the number of 4-byte code units that this UTF-8 string would require in UTF-32 format.
    *
    * This function is equivalent to count_utf8.
    *
@@ -2176,10 +2639,96 @@ class implementation {
    *
    * @param input         the UTF-8 string to process
    * @param length        the length of the string in bytes
-   * @return the number of char32_t words required to encode the UTF-8 string as UTF-32
+   * @return the number of char32_t code units required to encode the UTF-8 string as UTF-32
    */
   simdutf_warn_unused virtual size_t utf32_length_from_utf8(const char * input, size_t length) const noexcept = 0;
 
+  /**
+   * Convert possibly broken UTF-16LE string into Latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16LE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return number of written code units; 0 if input is not a valid UTF-16LE string
+   */
+  simdutf_warn_unused virtual size_t convert_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert possibly broken UTF-16BE string into Latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16BE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+   */
+  simdutf_warn_unused virtual size_t convert_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert possibly broken UTF-16LE string into Latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16LE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+   */
+  simdutf_warn_unused virtual result convert_utf16le_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert possibly broken UTF-16BE string into Latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16BE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+   */
+  simdutf_warn_unused virtual result convert_utf16be_to_latin1_with_errors(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert valid UTF-16LE string into Latin1 string.
+   *
+   * This function assumes that the input string is valid UTF-8.
+
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16LE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return number of written code units; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_valid_utf16le_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert valid UTF-16BE string into Latin1 string.
+   *
+   * This function assumes that the input string is valid UTF-8.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16BE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return number of written code units; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_valid_utf16be_to_latin1(const char16_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
   /**
    * Convert possibly broken UTF-16LE string into UTF-8 string.
    *
@@ -2189,9 +2738,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-16LE string
+   * @return number of written code units; 0 if input is not a valid UTF-16LE string
    */
   simdutf_warn_unused virtual size_t convert_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2204,9 +2753,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-16BE string
+   * @return number of written code units; 0 if input is not a valid UTF-16BE string
    */
   simdutf_warn_unused virtual size_t convert_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2219,9 +2768,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
    */
   simdutf_warn_unused virtual result convert_utf16le_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2234,9 +2783,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
    */
   simdutf_warn_unused virtual result convert_utf16be_to_utf8_with_errors(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2248,9 +2797,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2262,9 +2811,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf8(const char16_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2277,9 +2826,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf32_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-16LE string
+   * @return number of written code units; 0 if input is not a valid UTF-16LE string
    */
   simdutf_warn_unused virtual size_t convert_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
@@ -2292,9 +2841,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf32_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-16BE string
+   * @return number of written code units; 0 if input is not a valid UTF-16BE string
    */
   simdutf_warn_unused virtual size_t convert_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
@@ -2307,9 +2856,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf32_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
    */
   simdutf_warn_unused virtual result convert_utf16le_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
@@ -2322,9 +2871,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf32_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char32_t written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char32_t written if successful.
    */
   simdutf_warn_unused virtual result convert_utf16be_to_utf32_with_errors(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
@@ -2336,9 +2885,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf32_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf16le_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
@@ -2350,9 +2899,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param utf32_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf16be_to_utf32(const char16_t * input, size_t length, char32_t* utf32_buffer) const noexcept = 0;
 
@@ -2364,7 +2913,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @return the number of bytes required to encode the UTF-16LE string as UTF-8
    */
   simdutf_warn_unused virtual size_t utf8_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
@@ -2377,11 +2926,57 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @return the number of bytes required to encode the UTF-16BE string as UTF-8
    */
   simdutf_warn_unused virtual size_t utf8_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
 
+  /**
+   * Convert possibly broken UTF-32 string into Latin1 string.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-32 string to convert
+   * @param length        the length of the string in 4-byte code units (char32_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return number of written code units; 0 if input is not a valid UTF-32 string
+   */
+
+  simdutf_warn_unused virtual size_t convert_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert possibly broken UTF-32 string into Latin1 string and stop on error.
+   *
+   * During the conversion also validation of the input string is done.
+   * This function is suitable to work with inputs from untrusted sources.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-32 string to convert
+   * @param length        the length of the string in 4-byte code units (char32_t)
+   * @param latin1_buffer   the pointer to buffer that can hold conversion result
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
+   */
+
+  simdutf_warn_unused virtual result convert_utf32_to_latin1_with_errors(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
+  /**
+   * Convert valid UTF-32 string into Latin1 string.
+   *
+   * This function assumes that the input string is valid UTF-32.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-32 string to convert
+   * @param length        the length of the string in 4-byte code units (char32_t)
+   * @param latin1_buffer   the pointer to buffer that can hold the conversion result
+   * @return number of written code units; 0 if conversion is not possible
+   */
+  simdutf_warn_unused virtual size_t convert_valid_utf32_to_latin1(const char32_t * input, size_t length, char* latin1_buffer) const noexcept = 0;
+
   /**
    * Convert possibly broken UTF-32 string into UTF-8 string.
    *
@@ -2391,9 +2986,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-32 string
+   * @return number of written code units; 0 if input is not a valid UTF-32 string
    */
   simdutf_warn_unused virtual size_t convert_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2406,9 +3001,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf8_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char written if successful.
    */
   simdutf_warn_unused virtual result convert_utf32_to_utf8_with_errors(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
@@ -2420,12 +3015,23 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf8_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf8(const char32_t * input, size_t length, char* utf8_buffer) const noexcept = 0;
 
+
+    /**
+   * Return the number of bytes that this UTF-16 string would require in Latin1 format.
+   *
+   *
+   * @param input         the UTF-16 string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @return the number of bytes required to encode the UTF-16 string as Latin1
+   */
+    simdutf_warn_unused virtual size_t utf16_length_from_latin1(size_t length) const noexcept = 0;
+
   /**
    * Convert possibly broken UTF-32 string into UTF-16LE string.
    *
@@ -2435,9 +3041,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-32 string
+   * @return number of written code units; 0 if input is not a valid UTF-32 string
    */
   simdutf_warn_unused virtual size_t convert_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
@@ -2450,9 +3056,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
-   * @return number of written words; 0 if input is not a valid UTF-32 string
+   * @return number of written code units; 0 if input is not a valid UTF-32 string
    */
   simdutf_warn_unused virtual size_t convert_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
@@ -2465,9 +3071,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
    */
   simdutf_warn_unused virtual result convert_utf32_to_utf16le_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
@@ -2480,9 +3086,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold conversion result
-   * @return a result pair struct with an error code and either the position of the error if any or the number of char16_t written if successful.
+   * @return a result pair struct (of type simdutf::error containing the two fields error and count) with an error code and either position of the error (in the input in code units) if any, or the number of char16_t written if successful.
    */
   simdutf_warn_unused virtual result convert_utf32_to_utf16be_with_errors(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
@@ -2494,9 +3100,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16le(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
@@ -2508,9 +3114,9 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @param utf16_buffer   the pointer to buffer that can hold the conversion result
-   * @return number of written words; 0 if conversion is not possible
+   * @return number of written code units; 0 if conversion is not possible
    */
   simdutf_warn_unused virtual size_t convert_valid_utf32_to_utf16be(const char32_t * input, size_t length, char16_t* utf16_buffer) const noexcept = 0;
 
@@ -2523,33 +3129,88 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16 string to process
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @param output        the pointer to buffer that can hold the conversion result
    */
   virtual void change_endianness_utf16(const char16_t * input, size_t length, char16_t * output) const noexcept = 0;
 
+ /**
+   * Return the number of bytes that this Latin1 string would require in UTF-8 format.
+   *
+   * @param input         the Latin1 string to convert
+   * @param length        the length of the string bytes
+   * @return the number of bytes required to encode the Latin1 string as UTF-8
+   */
+    simdutf_warn_unused virtual size_t utf8_length_from_latin1(const char * input, size_t length) const noexcept = 0;
+
   /**
    * Compute the number of bytes that this UTF-32 string would require in UTF-8 format.
    *
    * This function does not validate the input.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @return the number of bytes required to encode the UTF-32 string as UTF-8
    */
   simdutf_warn_unused virtual size_t utf8_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
 
   /**
-   * Compute the number of two-byte words that this UTF-32 string would require in UTF-16 format.
+   * Compute the number of bytes that this UTF-32 string would require in Latin1 format.
+   *
+   * This function does not validate the input.
+   *
+   * @param length        the length of the string in 4-byte code units (char32_t)
+   * @return the number of bytes required to encode the UTF-32 string as Latin1
+   */
+  simdutf_warn_unused virtual size_t latin1_length_from_utf32(size_t length) const noexcept = 0;
+
+  /**
+   * Compute the number of bytes that this UTF-8 string would require in Latin1 format.
+   *
+   * This function does not validate the input.
+   *
+   * @param input         the UTF-8 string to convert
+   * @param length        the length of the string in byte
+   * @return the number of bytes required to encode the UTF-8 string as Latin1
+   */
+  simdutf_warn_unused virtual size_t latin1_length_from_utf8(const char * input, size_t length) const noexcept = 0;
+
+  /*
+   * Compute the number of bytes that this UTF-16LE/BE string would require in Latin1 format.
+   *
+   * This function does not validate the input.
+   *
+   * This function is not BOM-aware.
+   *
+   * @param input         the UTF-16LE string to convert
+   * @param length        the length of the string in 2-byte code units (char16_t)
+   * @return the number of bytes required to encode the UTF-16LE string as Latin1
+   */
+  simdutf_warn_unused virtual size_t latin1_length_from_utf16(size_t length) const noexcept = 0;
+
+  /**
+   * Compute the number of two-byte code units that this UTF-32 string would require in UTF-16 format.
    *
    * This function does not validate the input.
    *
    * @param input         the UTF-32 string to convert
-   * @param length        the length of the string in 4-byte words (char32_t)
+   * @param length        the length of the string in 4-byte code units (char32_t)
    * @return the number of bytes required to encode the UTF-32 string as UTF-16
    */
   simdutf_warn_unused virtual size_t utf16_length_from_utf32(const char32_t * input, size_t length) const noexcept = 0;
 
+
+    /**
+   * Return the number of bytes that this UTF-32 string would require in Latin1 format.
+   *
+   * This function does not validate the input.
+   *
+   * @param input         the UTF-32 string to convert
+   * @param length        the length of the string in 4-byte code units (char32_t)
+   * @return the number of bytes required to encode the UTF-32 string as Latin1
+   */
+    simdutf_warn_unused virtual size_t utf32_length_from_latin1(size_t length) const noexcept = 0;
+
   /*
    * Compute the number of bytes that this UTF-16LE string would require in UTF-32 format.
    *
@@ -2560,7 +3221,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @return the number of bytes required to encode the UTF-16LE string as UTF-32
    */
   simdutf_warn_unused virtual size_t utf32_length_from_utf16le(const char16_t * input, size_t length) const noexcept = 0;
@@ -2575,7 +3236,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to convert
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @return the number of bytes required to encode the UTF-16BE string as UTF-32
    */
   simdutf_warn_unused virtual size_t utf32_length_from_utf16be(const char16_t * input, size_t length) const noexcept = 0;
@@ -2589,7 +3250,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16LE string to process
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @return number of code points
    */
   simdutf_warn_unused virtual size_t count_utf16le(const char16_t * input, size_t length) const noexcept = 0;
@@ -2603,7 +3264,7 @@ class implementation {
    * This function is not BOM-aware.
    *
    * @param input         the UTF-16BE string to process
-   * @param length        the length of the string in 2-byte words (char16_t)
+   * @param length        the length of the string in 2-byte code units (char16_t)
    * @return number of code points
    */
   simdutf_warn_unused virtual size_t count_utf16be(const char16_t * input, size_t length) const noexcept = 0;
diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md
index 8106e7770ca112..008333ae1d7b6c 100644
--- a/doc/contributing/maintaining/maintaining-dependencies.md
+++ b/doc/contributing/maintaining/maintaining-dependencies.md
@@ -27,7 +27,7 @@ This a list of all the dependencies:
 * [npm 9.6.7][]
 * [openssl 3.0.8][]
 * [postject 1.0.0-alpha.6][]
-* [simdutf 3.2.18][]
+* [simdutf 4.0.4][]
 * [undici 5.27.0][]
 * [uvwasi 0.0.19][]
 * [V8 11.3.244.8][]
@@ -286,7 +286,7 @@ See [maintaining-openssl][] for more informations.
 The [postject](https://github.com/nodejs/postject) dependency is used for the
 [Single Executable strategic initiative](https://github.com/nodejs/single-executable).
 
-### simdutf 3.2.18
+### simdutf 4.0.4
 
 The [simdutf](https://github.com/simdutf/simdutf) dependency is
 a C++ library for fast UTF-8 decoding and encoding.
@@ -344,7 +344,7 @@ performance improvements not currently available in standard zlib.
 [npm 9.6.7]: #npm-967
 [openssl 3.0.8]: #openssl-308
 [postject 1.0.0-alpha.6]: #postject-100-alpha6
-[simdutf 3.2.18]: #simdutf-3218
+[simdutf 4.0.4]: #simdutf-404
 [undici 5.27.0]: #undici-5270
 [update-openssl-action]: ../../../.github/workflows/update-openssl.yml
 [uvwasi 0.0.19]: #uvwasi-0019

From 549d4422b7a1be0945dd5c21aad2465ba0e7c504 Mon Sep 17 00:00:00 2001
From: Ryan Zimmerman 
Date: Sat, 18 Nov 2023 13:25:48 -0500
Subject: [PATCH 154/229] doc: fix fs.writeFileSync return value documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In https://github.com/nodejs/node/pull/50009, the return value was accidentally made part of `flush` option bullet point.

PR-URL: https://github.com/nodejs/node/pull/50760
Reviewed-By: Luigi Pinca 
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: Deokjin Kim 
---
 doc/api/fs.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/api/fs.md b/doc/api/fs.md
index ac7c78ce8e084f..5ff5de1f9d73f8 100644
--- a/doc/api/fs.md
+++ b/doc/api/fs.md
@@ -6220,7 +6220,8 @@ changes:
   * `flag` {string} See [support of file system `flags`][]. **Default:** `'w'`.
   * `flush` {boolean} If all data is successfully written to the file, and
     `flush` is `true`, `fs.fsyncSync()` is used to flush the data.
-    Returns `undefined`.
+
+Returns `undefined`.
 
 The `mode` option only affects the newly created file. See [`fs.open()`][]
 for more details.

From 6584dd80f773d479e2e347ea4041afc1c6b724cc Mon Sep 17 00:00:00 2001
From: Jan 
Date: Sat, 18 Nov 2023 20:09:30 +0100
Subject: [PATCH 155/229] test: replace forEach() with for-loop

PR-URL: https://github.com/nodejs/node/pull/50596
Reviewed-By: Marco Ippolito 
Reviewed-By: Rafael Gonzaga 
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
---
 test/parallel/test-trace-events-vm.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/parallel/test-trace-events-vm.js b/test/parallel/test-trace-events-vm.js
index b52d22368d07d2..d85a2cefd645af 100644
--- a/test/parallel/test-trace-events-vm.js
+++ b/test/parallel/test-trace-events-vm.js
@@ -32,10 +32,10 @@ if (process.argv[2] === 'child') {
     fs.readFile(file, common.mustCall((err, data) => {
       const traces = JSON.parse(data.toString()).traceEvents
         .filter((trace) => trace.cat !== '__metadata');
-      traces.forEach((trace) => {
+      for (const trace of traces) {
         assert.strictEqual(trace.pid, proc.pid);
         assert(names.includes(trace.name));
-      });
+      }
     }));
   }));
 }

From 00f7a5d26f83475aea140f3d0ad90ee2d412d021 Mon Sep 17 00:00:00 2001
From: Lei Shi 
Date: Sun, 19 Nov 2023 14:33:59 +0800
Subject: [PATCH 156/229] benchmark: increase the iteration number to an
 appropriate value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Current iteration number is too small that fwrite occupies large
portion of execution time which made crypo execution time measured
inaccurate. The iteration above 1e5 makes 50% higher and stable
score.

PR-URL: https://github.com/nodejs/node/pull/50766
Reviewed-By: Debadree Chatterjee 
Reviewed-By: Filip Skokan 
Reviewed-By: Vinícius Lourenço Claro Cardoso 
---
 benchmark/crypto/webcrypto-digest.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark/crypto/webcrypto-digest.js b/benchmark/crypto/webcrypto-digest.js
index e2e6f55c6fdd1f..f5ef8e6bc49385 100644
--- a/benchmark/crypto/webcrypto-digest.js
+++ b/benchmark/crypto/webcrypto-digest.js
@@ -8,7 +8,7 @@ const bench = common.createBenchmark(main, {
   sync: ['createHash', 'subtle'],
   data: [10, 20, 50, 100],
   method: ['SHA-1', 'SHA-256', 'SHA-384', 'SHA-512'],
-  n: [1e3],
+  n: [1e5],
 });
 
 const kMethods = {

From 6620df1c0581fafa53c0c773699edd40f030f06c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Nie=C3=9Fen?= 
Date: Sun, 19 Nov 2023 14:15:35 +0000
Subject: [PATCH 157/229] src: remove erroneous default argument in RadixTree
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This default argument can never benefit any potential caller because any
call site that occurs after this definition will result in a compiler
error due to RadixTree::Lookup(const std::string_view&) now being an
ambiguous call target.

PR-URL: https://github.com/nodejs/node/pull/50736
Reviewed-By: Rafael Gonzaga 
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: Luigi Pinca 
---
 src/permission/fs_permission.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/permission/fs_permission.cc b/src/permission/fs_permission.cc
index 042dc0ff297216..9f8ab281618418 100644
--- a/src/permission/fs_permission.cc
+++ b/src/permission/fs_permission.cc
@@ -172,7 +172,7 @@ FSPermission::RadixTree::~RadixTree() {
 }
 
 bool FSPermission::RadixTree::Lookup(const std::string_view& s,
-                                     bool when_empty_return = false) const {
+                                     bool when_empty_return) const {
   FSPermission::RadixTree::Node* current_node = root_node_;
   if (current_node->children.size() == 0) {
     return when_empty_return;

From d08eb382cd594ae3e87790ebb81fd521c52a75a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Nie=C3=9Fen?= 
Date: Sun, 19 Nov 2023 20:21:35 +0000
Subject: [PATCH 158/229] src: avoid copying strings in FSPermission::Apply

The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e4, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: https://github.com/nodejs/node/pull/48491
Refs: https://github.com/nodejs/node/pull/49047
PR-URL: https://github.com/nodejs/node/pull/50662
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Rafael Gonzaga 
Reviewed-By: Marco Ippolito 
Reviewed-By: James M Snell 
Reviewed-By: Luigi Pinca 
---
 src/permission/fs_permission.cc | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/permission/fs_permission.cc b/src/permission/fs_permission.cc
index 9f8ab281618418..5780d6cb2607e3 100644
--- a/src/permission/fs_permission.cc
+++ b/src/permission/fs_permission.cc
@@ -119,10 +119,8 @@ namespace permission {
 // allow = '/tmp/,/home/example.js'
 void FSPermission::Apply(const std::vector& allow,
                          PermissionScope scope) {
-  using std::string_view_literals::operator""sv;
-
-  for (const std::string_view res : allow) {
-    if (res == "*"sv) {
+  for (const std::string& res : allow) {
+    if (res == "*") {
       if (scope == PermissionScope::kFileSystemRead) {
         deny_all_in_ = false;
         allow_all_in_ = true;
@@ -132,7 +130,7 @@ void FSPermission::Apply(const std::vector& allow,
       }
       return;
     }
-    GrantAccess(scope, std::string(res.data(), res.size()));
+    GrantAccess(scope, res);
   }
 }
 

From a9d290956e68b47c6a1d9249cf5d3abfdaff284f Mon Sep 17 00:00:00 2001
From: Honza Machala 
Date: Sun, 19 Nov 2023 21:29:56 +0100
Subject: [PATCH 159/229] test: replace forEach in
 whatwg-encoding-custom-interop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50607
Reviewed-By: Tobias Nießen 
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
---
 test/parallel/test-whatwg-encoding-custom-interop.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/parallel/test-whatwg-encoding-custom-interop.js b/test/parallel/test-whatwg-encoding-custom-interop.js
index 592dcc8582816f..80d088f6ce3741 100644
--- a/test/parallel/test-whatwg-encoding-custom-interop.js
+++ b/test/parallel/test-whatwg-encoding-custom-interop.js
@@ -56,9 +56,9 @@ assert(TextEncoder);
   encodingGetter.call(instance);
 
   const invalidThisArgs = [{}, [], true, 1, '', new TextDecoder()];
-  invalidThisArgs.forEach((i) => {
+  for (const i of invalidThisArgs) {
     assert.throws(() => inspectFn.call(i, Infinity, {}), expectedError);
     assert.throws(() => encodeFn.call(i), expectedError);
     assert.throws(() => encodingGetter.call(i), expectedError);
-  });
+  }
 }

From aea7fe54af3f0cfa66d585a37b113e570c907102 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli 
Date: Sun, 19 Nov 2023 17:35:19 -0500
Subject: [PATCH 160/229] inspector: use private fields instead of symbols
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50776
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: James M Snell 
Reviewed-By: Geoffrey Booth 
---
 lib/inspector.js | 57 ++++++++++++++++++++----------------------------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/lib/inspector.js b/lib/inspector.js
index 70796c83fcffe3..e51bcf2f3cd977 100644
--- a/lib/inspector.js
+++ b/lib/inspector.js
@@ -4,7 +4,6 @@ const {
   JSONParse,
   JSONStringify,
   SafeMap,
-  Symbol,
   SymbolDispose,
 } = primordials;
 
@@ -45,28 +44,19 @@ const {
   console,
 } = internalBinding('inspector');
 
-const connectionSymbol = Symbol('connectionProperty');
-const messageCallbacksSymbol = Symbol('messageCallbacks');
-const nextIdSymbol = Symbol('nextId');
-const onMessageSymbol = Symbol('onMessage');
-
 class Session extends EventEmitter {
-  constructor() {
-    super();
-    this[connectionSymbol] = null;
-    this[nextIdSymbol] = 1;
-    this[messageCallbacksSymbol] = new SafeMap();
-  }
+  #connection = null;
+  #nextId = 1;
+  #messageCallbacks = new SafeMap();
 
   /**
    * Connects the session to the inspector back-end.
    * @returns {void}
    */
   connect() {
-    if (this[connectionSymbol])
+    if (this.#connection)
       throw new ERR_INSPECTOR_ALREADY_CONNECTED('The inspector session');
-    this[connectionSymbol] =
-      new Connection((message) => this[onMessageSymbol](message));
+    this.#connection = new Connection((message) => this.#onMessage(message));
   }
 
   /**
@@ -77,23 +67,24 @@ class Session extends EventEmitter {
   connectToMainThread() {
     if (isMainThread)
       throw new ERR_INSPECTOR_NOT_WORKER();
-    if (this[connectionSymbol])
+    if (this.#connection)
       throw new ERR_INSPECTOR_ALREADY_CONNECTED('The inspector session');
-    this[connectionSymbol] =
+    this.#connection =
       new MainThreadConnection(
-        (message) => queueMicrotask(() => this[onMessageSymbol](message)));
+        (message) => queueMicrotask(() => this.#onMessage(message)));
   }
 
-  [onMessageSymbol](message) {
+  #onMessage(message) {
     const parsed = JSONParse(message);
     try {
       if (parsed.id) {
-        const callback = this[messageCallbacksSymbol].get(parsed.id);
-        this[messageCallbacksSymbol].delete(parsed.id);
+        const callback = this.#messageCallbacks.get(parsed.id);
+        this.#messageCallbacks.delete(parsed.id);
         if (callback) {
           if (parsed.error) {
-            return callback(new ERR_INSPECTOR_COMMAND(parsed.error.code,
-                                                      parsed.error.message));
+            return callback(
+              new ERR_INSPECTOR_COMMAND(parsed.error.code, parsed.error.message),
+            );
           }
 
           callback(null, parsed.result);
@@ -127,18 +118,18 @@ class Session extends EventEmitter {
       validateFunction(callback, 'callback');
     }
 
-    if (!this[connectionSymbol]) {
+    if (!this.#connection) {
       throw new ERR_INSPECTOR_NOT_CONNECTED();
     }
-    const id = this[nextIdSymbol]++;
+    const id = this.#nextId++;
     const message = { id, method };
     if (params) {
       message.params = params;
     }
     if (callback) {
-      this[messageCallbacksSymbol].set(id, callback);
+      this.#messageCallbacks.set(id, callback);
     }
-    this[connectionSymbol].dispatch(JSONStringify(message));
+    this.#connection.dispatch(JSONStringify(message));
   }
 
   /**
@@ -148,16 +139,16 @@ class Session extends EventEmitter {
    * @returns {void}
    */
   disconnect() {
-    if (!this[connectionSymbol])
+    if (!this.#connection)
       return;
-    this[connectionSymbol].disconnect();
-    this[connectionSymbol] = null;
-    const remainingCallbacks = this[messageCallbacksSymbol].values();
+    this.#connection.disconnect();
+    this.#connection = null;
+    const remainingCallbacks = this.#messageCallbacks.values();
     for (const callback of remainingCallbacks) {
       process.nextTick(callback, new ERR_INSPECTOR_CLOSED());
     }
-    this[messageCallbacksSymbol].clear();
-    this[nextIdSymbol] = 1;
+    this.#messageCallbacks.clear();
+    this.#nextId = 1;
   }
 }
 

From 904e645bcdd10f39aa22f1d66fac06127d7c30f1 Mon Sep 17 00:00:00 2001
From: Keyhan Vakil 
Date: Sun, 12 Nov 2023 22:48:53 +0000
Subject: [PATCH 161/229] build: add configuration flag to enable Maglev

This adds a configuration flag to enable V8's Maglev compiler.

Unfortunately compilation fails unless you have clang-14+ or gcc-13+,
but I sent a patch for that upstream. Other than that, it builds and all
tests pass locally on my x86-64 Linux machine.

The gn scraper regexes were broken preventing the compilation from
linking. Fix them. As a drive-by, also add additional conditionals for
compilation on 32-bit arm.

Refs: https://github.com/nodejs/node/issues/50690
PR-URL: https://github.com/nodejs/node/pull/50692
Reviewed-By: Ben Noordhuis 
Reviewed-By: Michael Dawson 
Reviewed-By: James M Snell 
---
 configure.py             |  7 +++++++
 tools/v8_gypfiles/v8.gyp | 14 ++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/configure.py b/configure.py
index 81e478e5b89c8b..7880f758e948db 100755
--- a/configure.py
+++ b/configure.py
@@ -812,6 +812,12 @@
     help='Enable V8 transparent hugepage support. This feature is only '+
          'available on Linux platform.')
 
+parser.add_argument('--v8-enable-maglev',
+    action='store_true',
+    dest='v8_enable_maglev',
+    default=None,
+    help='Enable V8 Maglev compiler. Not available on all platforms.')
+
 parser.add_argument('--v8-enable-short-builtin-calls',
     action='store_true',
     dest='v8_enable_short_builtin_calls',
@@ -1494,6 +1500,7 @@ def configure_v8(o):
   o['variables']['v8_random_seed'] = 0  # Use a random seed for hash tables.
   o['variables']['v8_promise_internal_field_count'] = 1 # Add internal field to promises for async hooks.
   o['variables']['v8_use_siphash'] = 0 if options.without_siphash else 1
+  o['variables']['v8_enable_maglev'] = 1 if options.v8_enable_maglev else 0
   o['variables']['v8_enable_pointer_compression'] = 1 if options.enable_pointer_compression else 0
   o['variables']['v8_enable_31bit_smis_on_64bit_arch'] = 1 if options.enable_pointer_compression else 0
   o['variables']['v8_enable_shared_ro_heap'] = 0 if options.enable_pointer_compression or options.disable_shared_ro_heap else 1
diff --git a/tools/v8_gypfiles/v8.gyp b/tools/v8_gypfiles/v8.gyp
index 109d0ea1aa2e51..adfc3f6e1095dd 100644
--- a/tools/v8_gypfiles/v8.gyp
+++ b/tools/v8_gypfiles/v8.gyp
@@ -546,6 +546,11 @@
               '
Date: Mon, 20 Nov 2023 13:07:55 +0100
Subject: [PATCH 162/229] test: enable idlharness tests for encoding

TextDecoderStream and TextEncoderStream are now exposed as globals,
so we can run the entire Encoding idlharness test suite.

PR-URL: https://github.com/nodejs/node/pull/50778
Reviewed-By: Filip Skokan 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
Reviewed-By: Benjamin Gruenbaum 
---
 test/common/wpt.js            | 2 +-
 test/wpt/status/encoding.json | 2 +-
 test/wpt/test-encoding.js     | 2 ++
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/common/wpt.js b/test/common/wpt.js
index 7d0b030614b003..0673d4bc8d721e 100644
--- a/test/common/wpt.js
+++ b/test/common/wpt.js
@@ -610,7 +610,7 @@ class WPTRunner {
       'ReadableStreamBYOBReader', 'ReadableStreamBYOBRequest',
       'ReadableByteStreamController', 'ReadableStreamDefaultController',
       'ByteLengthQueuingStrategy', 'CountQueuingStrategy',
-      'TextEncoderStream', 'TextDecoderStream',
+      'TextEncoder', 'TextDecoder', 'TextEncoderStream', 'TextDecoderStream',
       'CompressionStream', 'DecompressionStream',
     ];
     if (Boolean(process.versions.openssl) && !process.env.NODE_SKIP_CRYPTO) {
diff --git a/test/wpt/status/encoding.json b/test/wpt/status/encoding.json
index 09f8c2b545b247..db481cca6a09df 100644
--- a/test/wpt/status/encoding.json
+++ b/test/wpt/status/encoding.json
@@ -37,7 +37,7 @@
     "skip": "The iso-8859-16 encoding is not supported"
   },
   "idlharness.any.js": {
-    "skip": "No implementation of TextDecoderStream and TextEncoderStream"
+    "requires": ["small-icu"]
   },
   "idlharness-shadowrealm.window.js": {
     "skip": "ShadowRealm support is not enabled"
diff --git a/test/wpt/test-encoding.js b/test/wpt/test-encoding.js
index af20760bd533d3..eb54c70867cb43 100644
--- a/test/wpt/test-encoding.js
+++ b/test/wpt/test-encoding.js
@@ -3,4 +3,6 @@
 const { WPTRunner } = require('../common/wpt');
 const runner = new WPTRunner('encoding');
 
+runner.pretendGlobalThisAs('Window');
+
 runner.runJsTests();

From e22ce9586f60a22232e7dd5dc60d25c8905d8c30 Mon Sep 17 00:00:00 2001
From: Daniel Meechan 
Date: Mon, 20 Nov 2023 15:48:07 +0000
Subject: [PATCH 163/229] doc: update Crypto API doc for x509.keyUsage

PR-URL: https://github.com/nodejs/node/pull/50603
Refs: https://github.com/nodejs/node/issues/48727
Reviewed-By: James M Snell 
Reviewed-By: Marco Ippolito 
---
 doc/api/crypto.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/api/crypto.md b/doc/api/crypto.md
index 194f3cb9bb1024..93f0bd3c1a169f 100644
--- a/doc/api/crypto.md
+++ b/doc/api/crypto.md
@@ -2722,7 +2722,7 @@ added: v15.9.0
 The issuer certificate or `undefined` if the issuer certificate is not
 available.
 
-### `x509.keyUsage`
+### `x509.extKeyUsage`
 
 
+
+Disable specific process warnings by `code` or `type`.
+
+Warnings emitted from [`process.emitWarning()`][emit_warning] may contain a
+`code` and a `type`. This option will not-emit warnings that have a matching
+`code` or `type`.
+
+List of [deprecation warnings][].
+
+The Node.js core warning types are: `DeprecationWarning` and
+`ExperimentalWarning`
+
+For example, the following script will not emit
+[DEP0025 `require('node:sys')`][DEP0025 warning] when executed with
+`node --disable-warning=DEP0025`:
+
+```mjs
+import sys from 'node:sys';
+```
+
+```cjs
+const sys = require('node:sys');
+```
+
+For example, the following script will emit the
+[DEP0025 `require('node:sys')`][DEP0025 warning], but not any Experimental
+Warnings (such as
+[ExperimentalWarning: `vm.measureMemory` is an experimental feature][]
+in <=v21) when executed with `node --disable-warning=ExperimentalWarnings`:
+
+```mjs
+import sys from 'node:sys';
+import vm from 'node:vm';
+
+vm.measureMemory();
+```
+
+```cjs
+const sys = require('node:sys');
+const vm = require('node:vm');
+
+vm.measureMemory();
+```
+
 ### `--disable-proto=mode`
 
 
+
+Type: Documentation-only
+
+The [`util.types.isWebAssemblyCompiledModule`][] API is deprecated. Please use
+`value instanceof WebAssembly.Module` instead.
+
 [NIST SP 800-38D]: https://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38d.pdf
 [RFC 6066]: https://tools.ietf.org/html/rfc6066#section-3
 [RFC 8247 Section 2.4]: https://www.rfc-editor.org/rfc/rfc8247#section-2.4
@@ -3563,6 +3580,7 @@ deprecated. Get them from `fs.constants` or `fs.promises.constants` instead.
 [`util.log()`]: util.md#utillogstring
 [`util.promisify`]: util.md#utilpromisifyoriginal
 [`util.toUSVString()`]: util.md#utiltousvstringstring
+[`util.types.isWebAssemblyCompiledModule`]: util.md#utiltypesiswebassemblycompiledmodulevalue
 [`util.types`]: util.md#utiltypes
 [`util`]: util.md
 [`worker.exitedAfterDisconnect`]: cluster.md#workerexitedafterdisconnect

From 076dc7540bb9c4e00eeed865d3b5ecb420791e96 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Nie=C3=9Fen?= 
Date: Wed, 22 Nov 2023 18:30:36 +0100
Subject: [PATCH 183/229] permission: do not create symlinks if target is
 relative

The permission model's security guarantees fall apart in the presence of
relative symbolic links. When an application attempts to create a
relative symlink, the permission model currently resolves the relative
path into an absolute path based on the process's current working
directory, checks whether the process has the relevant permissions, and
then creates the symlink using the absolute target path. This behavior
is plainly incorrect for two reasons:

1. The target path should never be resolved relative to the current
   working directory. If anything, it should be resolved relative to the
   symlink's location. (Of course, there is one insane exception to this
   rule: on Windows, each process has a current working directory per
   drive, and symlinks can be created with a target path relative to the
   current working directory of a specific drive. In that case, the
   relative path will be resolved relative to the current working
   directory for the respective drive, and the symlink will be created
   on disk with the resulting absolute path. Other relative symlinks
   will be stored as-is.)
2. Silently creating an absolute symlink when the user requested a
   relative symlink is wrong. The user may (or may not) rely on the
   symlink being relative. For example, npm heavily relies on relative
   symbolic links such that node_modules directories can be moved around
   without breaking.

Because we don't know the user's intentions, we don't know if creating
an absolute symlink instead of a relative symlink is acceptable. This
patch prevents the faulty behavior by not (incorrectly) resolving
relative symlink targets when the permission model is enabled, and by
instead simply refusing the create any relative symlinks.

The fs APIs accept Uint8Array objects for paths to be able to handle
arbitrary file name charsets, however, checking whether such an object
represents a relative part in a reliable and portable manner is tricky.
Other parts of the permission model incorrectly convert such objects to
strings and then back to an Uint8Array (see 1f64147eb607f82060e08884f),
however, for now, this bug fix will simply throw on non-string symlink
targets when the permission model is enabled. (The permission model
already breaks existing applications in various ways, so this shouldn't
be too dramatic.)

PR-URL: https://github.com/nodejs/node/pull/49156
Reviewed-By: Rafael Gonzaga 
Reviewed-By: Michael Dawson 
Reviewed-By: James M Snell 
---
 lib/fs.js                                     | 22 +++++++++++++++
 lib/internal/fs/promises.js                   | 14 ++++++++++
 .../test-permission-fs-symlink-relative.js    | 27 +++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 test/parallel/test-permission-fs-symlink-relative.js

diff --git a/lib/fs.js b/lib/fs.js
index 7edf3232ad15dc..bce9d38026169c 100644
--- a/lib/fs.js
+++ b/lib/fs.js
@@ -58,6 +58,7 @@ const {
 } = constants;
 
 const pathModule = require('path');
+const { isAbsolute } = pathModule;
 const { isArrayBufferView } = require('internal/util/types');
 
 const binding = internalBinding('fs');
@@ -68,6 +69,7 @@ const { Buffer } = require('buffer');
 const {
   aggregateTwoErrors,
   codes: {
+    ERR_ACCESS_DENIED,
     ERR_FS_FILE_TOO_LARGE,
     ERR_INVALID_ARG_VALUE,
   },
@@ -143,6 +145,8 @@ const {
   kValidateObjectAllowNullable,
 } = require('internal/validators');
 
+const permission = require('internal/process/permission');
+
 let truncateWarn = true;
 let fs;
 
@@ -1740,6 +1744,15 @@ function symlink(target, path, type_, callback_) {
   const type = (typeof type_ === 'string' ? type_ : null);
   const callback = makeCallback(arguments[arguments.length - 1]);
 
+  if (permission.isEnabled()) {
+    // The permission model's security guarantees fall apart in the presence of
+    // relative symbolic links. Thus, we have to prevent their creation.
+    if (typeof target !== 'string' || !isAbsolute(toPathIfFileURL(target))) {
+      callback(new ERR_ACCESS_DENIED('relative symbolic link target'));
+      return;
+    }
+  }
+
   target = getValidatedPath(target, 'target');
   path = getValidatedPath(path);
 
@@ -1796,6 +1809,15 @@ function symlinkSync(target, path, type) {
       type = 'dir';
     }
   }
+
+  if (permission.isEnabled()) {
+    // The permission model's security guarantees fall apart in the presence of
+    // relative symbolic links. Thus, we have to prevent their creation.
+    if (typeof target !== 'string' || !isAbsolute(toPathIfFileURL(target))) {
+      throw new ERR_ACCESS_DENIED('relative symbolic link target');
+    }
+  }
+
   target = getValidatedPath(target, 'target');
   path = getValidatedPath(path);
 
diff --git a/lib/internal/fs/promises.js b/lib/internal/fs/promises.js
index 11d230077604bf..11f2b282dd010d 100644
--- a/lib/internal/fs/promises.js
+++ b/lib/internal/fs/promises.js
@@ -33,6 +33,7 @@ const { Buffer } = require('buffer');
 
 const {
   codes: {
+    ERR_ACCESS_DENIED,
     ERR_FS_FILE_TOO_LARGE,
     ERR_INVALID_ARG_VALUE,
     ERR_INVALID_STATE,
@@ -85,6 +86,8 @@ const {
   kValidateObjectAllowNullable,
 } = require('internal/validators');
 const pathModule = require('path');
+const { isAbsolute } = pathModule;
+const { toPathIfFileURL } = require('internal/url');
 const {
   kEmptyObject,
   lazyDOMException,
@@ -97,6 +100,8 @@ const nonNativeWatcher = require('internal/fs/recursive_watch');
 const { isIterable } = require('internal/streams/utils');
 const assert = require('internal/assert');
 
+const permission = require('internal/process/permission');
+
 const kHandle = Symbol('kHandle');
 const kFd = Symbol('kFd');
 const kRefs = Symbol('kRefs');
@@ -883,6 +888,15 @@ async function symlink(target, path, type_) {
       type = 'file';
     }
   }
+
+  if (permission.isEnabled()) {
+    // The permission model's security guarantees fall apart in the presence of
+    // relative symbolic links. Thus, we have to prevent their creation.
+    if (typeof target !== 'string' || !isAbsolute(toPathIfFileURL(target))) {
+      throw new ERR_ACCESS_DENIED('relative symbolic link target');
+    }
+  }
+
   target = getValidatedPath(target, 'target');
   path = getValidatedPath(path);
   return binding.symlink(preprocessSymlinkDestination(target, type, path),
diff --git a/test/parallel/test-permission-fs-symlink-relative.js b/test/parallel/test-permission-fs-symlink-relative.js
new file mode 100644
index 00000000000000..e7cd99ad5f227a
--- /dev/null
+++ b/test/parallel/test-permission-fs-symlink-relative.js
@@ -0,0 +1,27 @@
+// Flags: --experimental-permission --allow-fs-read=* --allow-fs-write=*
+'use strict';
+
+const common = require('../common');
+common.skipIfWorker();
+
+const assert = require('assert');
+const { symlinkSync, symlink, promises: { symlink: symlinkAsync } } = require('fs');
+
+const error = {
+  code: 'ERR_ACCESS_DENIED',
+  message: /relative symbolic link target/,
+};
+
+for (const targetString of ['a', './b/c', '../d', 'e/../f', 'C:drive-relative', 'ntfs:alternate']) {
+  for (const target of [targetString, Buffer.from(targetString)]) {
+    for (const path of [__filename, __dirname, process.execPath]) {
+      assert.throws(() => symlinkSync(target, path), error);
+      symlink(target, path, common.mustCall((err) => {
+        assert(err);
+        assert.strictEqual(err.code, error.code);
+        assert.match(err.message, error.message);
+      }));
+      assert.rejects(() => symlinkAsync(target, path), error).then(common.mustCall());
+    }
+  }
+}

From b3d015de71525006a768f933a0ac26b559e98302 Mon Sep 17 00:00:00 2001
From: Antoine du Hamel 
Date: Wed, 22 Nov 2023 20:03:33 +0100
Subject: [PATCH 184/229] doc: get rid of unnecessary `eslint-skip` comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50829
Reviewed-By: Luigi Pinca 
Reviewed-By: Ethan Arrowood 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Paolo Insogna 
Reviewed-By: Tobias Nießen 
Reviewed-By: Marco Ippolito 
---
 doc/api/crypto.md      |  4 +++-
 doc/api/errors.md      |  4 +---
 doc/api/http.md        | 14 ++++++--------
 doc/api/https.md       |  2 --
 doc/api/querystring.md |  8 +++-----
 doc/api/tls.md         |  2 --
 doc/api/util.md        |  4 +---
 7 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/doc/api/crypto.md b/doc/api/crypto.md
index 93f0bd3c1a169f..3175d391032ae4 100644
--- a/doc/api/crypto.md
+++ b/doc/api/crypto.md
@@ -42,7 +42,7 @@ calling `require('node:crypto')` will result in an error being thrown.
 
 When using CommonJS, the error thrown can be caught using try/catch:
 
-
+
 
 ```cjs
 let crypto;
@@ -53,6 +53,8 @@ try {
 }
 ```
 
+
+
 When using the lexical ESM `import` keyword, the error can only be
 caught if a handler for `process.on('uncaughtException')` is registered
 _before_ any attempt to load the module is made (using, for instance,
diff --git a/doc/api/errors.md b/doc/api/errors.md
index e57960e7893c4a..ba6cfc412fb42a 100644
--- a/doc/api/errors.md
+++ b/doc/api/errors.md
@@ -2975,9 +2975,7 @@ signal (such as [`subprocess.kill()`][]).
 [self-reference a package using its name][] and [define a custom subpath][] in
 the [`"exports"`][] field of the [`package.json`][] file.
 
-
-
-```js
+```mjs
 import './'; // unsupported
 import './index.js'; // supported
 import 'package-name'; // supported
diff --git a/doc/api/http.md b/doc/api/http.md
index 42c8a9fb1d9980..507a850f932aa0 100644
--- a/doc/api/http.md
+++ b/doc/api/http.md
@@ -16,14 +16,12 @@ user is able to stream data.
 
 HTTP message headers are represented by an object like this:
 
-
-
-```js
-{ 'content-length': '123',
-  'content-type': 'text/plain',
-  'connection': 'keep-alive',
-  'host': 'example.com',
-  'accept': '*/*' }
+```json
+{ "content-length": "123",
+  "content-type": "text/plain",
+  "connection": "keep-alive",
+  "host": "example.com",
+  "accept": "*/*" }
 ```
 
 Keys are lowercased. Values are not modified.
diff --git a/doc/api/https.md b/doc/api/https.md
index 20f427d3b7dfd9..8ae80d28b61654 100644
--- a/doc/api/https.md
+++ b/doc/api/https.md
@@ -17,8 +17,6 @@ calling `require('node:https')` will result in an error being thrown.
 
 When using CommonJS, the error thrown can be caught using try/catch:
 
-
-
 ```cjs
 let https;
 try {
diff --git a/doc/api/querystring.md b/doc/api/querystring.md
index 752d4b9d584242..f7f45d57d927ce 100644
--- a/doc/api/querystring.md
+++ b/doc/api/querystring.md
@@ -87,12 +87,10 @@ collection of key and value pairs.
 
 For example, the query string `'foo=bar&abc=xyz&abc=123'` is parsed into:
 
-
-
-```js
+```json
 {
-  foo: 'bar',
-  abc: ['xyz', '123']
+  "foo": "bar",
+  "abc": ["xyz", "123"]
 }
 ```
 
diff --git a/doc/api/tls.md b/doc/api/tls.md
index 92ca0619e691ee..4f5553f7cb94e7 100644
--- a/doc/api/tls.md
+++ b/doc/api/tls.md
@@ -22,8 +22,6 @@ calling `require('node:tls')` will result in an error being thrown.
 
 When using CommonJS, the error thrown can be caught using try/catch:
 
-
-
 ```cjs
 let tls;
 try {
diff --git a/doc/api/util.md b/doc/api/util.md
index 24d07ace984a29..d8cc15835f3e6f 100644
--- a/doc/api/util.md
+++ b/doc/api/util.md
@@ -2511,9 +2511,7 @@ added: v10.0.0
 
 Returns `true` if the value is an instance of a [Module Namespace Object][].
 
-
-
-```js
+```mjs
 import * as ns from './a.js';
 
 util.types.isModuleNamespaceObject(ns);  // Returns true

From e14f661950b6763694c56aa0ffe7ad18bebf65a4 Mon Sep 17 00:00:00 2001
From: Pulkit Gupta 
Date: Thu, 23 Nov 2023 19:03:29 +0530
Subject: [PATCH 185/229] doc: shard not supported with watch mode

PR-URL: https://github.com/nodejs/node/pull/50640
Reviewed-By: Raz Luvaton 
Reviewed-By: Moshe Atlow 
---
 doc/api/test.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/api/test.md b/doc/api/test.md
index 1f53888e0bf96f..0d3dd96482ae72 100644
--- a/doc/api/test.md
+++ b/doc/api/test.md
@@ -1161,6 +1161,11 @@ changes:
       of shards to split the test files to. This option is _required_.
 * Returns: {TestsStream}
 
+**Note:** `shard` is used to horizontally parallelize test running across
+machines or processes, ideal for large-scale executions across varied
+environments. It's incompatible with `watch` mode, tailored for rapid
+code iteration by automatically rerunning tests on file changes.
+
 ```mjs
 import { tap } from 'node:test/reporters';
 import { run } from 'node:test';

From 107b5e63c5bd1394b19e84121b915d5a6d7e05e2 Mon Sep 17 00:00:00 2001
From: Shikha Mehta 
Date: Thu, 23 Nov 2023 08:34:46 -0800
Subject: [PATCH 186/229] test: replace foreach with for in
 test-https-simple.js
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes: https://github.com/nodejs/node/issues/50818
PR-URL: https://github.com/nodejs/node/pull/49793
Reviewed-By: Tobias Nießen 
Reviewed-By: Jithil P Ponnan 
---
 test/parallel/test-https-simple.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/parallel/test-https-simple.js b/test/parallel/test-https-simple.js
index a65883162f60a2..b0562a1cd98174 100644
--- a/test/parallel/test-https-simple.js
+++ b/test/parallel/test-https-simple.js
@@ -45,13 +45,13 @@ const serverCallback = common.mustCall(function(req, res) {
 });
 
 const invalid_options = [ 'foo', 42, true, [] ];
-invalid_options.forEach((option) => {
+for (const option of invalid_options) {
   assert.throws(() => {
     new https.Server(option);
   }, {
     code: 'ERR_INVALID_ARG_TYPE',
   });
-});
+}
 
 const server = https.createServer(options, serverCallback);
 

From c8d6dd58e725fabd179e912bde18510725965b7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ulises=20Gasc=C3=B3n?= 
Date: Tue, 21 Nov 2023 08:30:52 +0100
Subject: [PATCH 187/229] tools: add macOS notarization verification step

PR-URL: https://github.com/nodejs/node/pull/50833
Reviewed-By: Luigi Pinca 
Reviewed-By: Michael Dawson 
---
 tools/osx-notarize.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/osx-notarize.sh b/tools/osx-notarize.sh
index 292ce5af1d7dc2..d860074bf7d64e 100755
--- a/tools/osx-notarize.sh
+++ b/tools/osx-notarize.sh
@@ -48,5 +48,12 @@ else
   exit 1
 fi
 
+if ! xcrun spctl --assess --type install --context context:primary-signature --ignore-cache --verbose=2 "node-$pkgid.pkg"; then
+  echo "error: Signature will not be accepted by Gatekeeper!" 1>&2
+  exit 1
+else
+  echo "Verification was successful."
+fi
+
 xcrun stapler staple "node-$pkgid.pkg"
 echo "Stapler was successful."
\ No newline at end of file

From ba40b2e33eae9e33f5a47d9463849bf782776059 Mon Sep 17 00:00:00 2001
From: CanadaHonk 
Date: Thu, 23 Nov 2023 22:47:36 +0000
Subject: [PATCH 188/229] fs: replace deprecated `path._makeLong` in copyFile

PR-URL: https://github.com/nodejs/node/pull/50844
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Luigi Pinca 
---
 lib/fs.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/fs.js b/lib/fs.js
index bce9d38026169c..59724e884e6dbe 100644
--- a/lib/fs.js
+++ b/lib/fs.js
@@ -3001,8 +3001,8 @@ function copyFile(src, dest, mode, callback) {
   src = getValidatedPath(src, 'src');
   dest = getValidatedPath(dest, 'dest');
 
-  src = pathModule._makeLong(src);
-  dest = pathModule._makeLong(dest);
+  src = pathModule.toNamespacedPath(src);
+  dest = pathModule.toNamespacedPath(dest);
   mode = getValidMode(mode, 'copyFile');
   callback = makeCallback(callback);
 

From 96143a329305fb2e17ec31b51cc66e22f6add37b Mon Sep 17 00:00:00 2001
From: Alessandro Di Nisio <95277049+StiffWriter00@users.noreply.github.com>
Date: Fri, 24 Nov 2023 14:52:32 +0100
Subject: [PATCH 189/229] test: replace forEach to for at
 test-webcrypto-sign-verify-ecdsa.js

PR-URL: https://github.com/nodejs/node/pull/50795
Reviewed-By: James M Snell 
Reviewed-By: Marco Ippolito 
Reviewed-By: Luigi Pinca 
---
 test/parallel/test-webcrypto-sign-verify-ecdsa.js | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/parallel/test-webcrypto-sign-verify-ecdsa.js b/test/parallel/test-webcrypto-sign-verify-ecdsa.js
index 072485cca59f7f..8fbf572ef5c64e 100644
--- a/test/parallel/test-webcrypto-sign-verify-ecdsa.js
+++ b/test/parallel/test-webcrypto-sign-verify-ecdsa.js
@@ -227,10 +227,11 @@ async function testSign({ name,
 (async function() {
   const variations = [];
 
-  vectors.forEach((vector) => {
+  for (let i = 0; i < vectors.length; ++i) {
+    const vector = vectors[i];
     variations.push(testVerify(vector));
     variations.push(testSign(vector));
-  });
+  }
 
   await Promise.all(variations);
 })().then(common.mustCall());

From 4544593d31a2ea98b5816de6125717245eb1dac0 Mon Sep 17 00:00:00 2001
From: Conor Watson 
Date: Fri, 24 Nov 2023 14:11:34 +0000
Subject: [PATCH 190/229] test: replace forEach with for of

PR-URL: https://github.com/nodejs/node/pull/50594
Reviewed-By: Rafael Gonzaga 
Reviewed-By: Marco Ippolito 
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
---
 test/parallel/test-webcrypto-sign-verify-hmac.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/parallel/test-webcrypto-sign-verify-hmac.js b/test/parallel/test-webcrypto-sign-verify-hmac.js
index 00b742dbfea5d1..5c2d8e6cd770ff 100644
--- a/test/parallel/test-webcrypto-sign-verify-hmac.js
+++ b/test/parallel/test-webcrypto-sign-verify-hmac.js
@@ -172,10 +172,10 @@ async function testSign({ hash,
 (async function() {
   const variations = [];
 
-  vectors.forEach((vector) => {
+  for (const vector of vectors) {
     variations.push(testVerify(vector));
     variations.push(testSign(vector));
-  });
+  }
 
   await Promise.all(variations);
 })().then(common.mustCall());

From 7c28a4ca8f2db5aa20e560ef1bbc6e0b732b6c73 Mon Sep 17 00:00:00 2001
From: Brad House 
Date: Wed, 15 Nov 2023 09:33:47 -0500
Subject: [PATCH 191/229] test: fix dns test case failures after c-ares update
 to 1.21.0+

c-ares has made intentional changes to the behavior of TXT records
to comply with RFC 7208, which concatenates multiple strings for
the same TXT record into a single string.  Multiple TXT records
are not concatenated.

Also, response handling has changed, such that a response which is
completely invalid in formatting is thrown away as a malicious
forged/spoofed packet rather than returning EBADRESP.  This is one
step toward RFC 9018 (EDNS COOKIES) which will require the message
to at least be structurally valid to validate against spoofed
records.

Fix By: Brad House (@bradh352)

PR-URL: https://github.com/nodejs/node/pull/50743
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
Fixes: https://github.com/nodejs/node/issues/50741
Refs: https://github.com/nodejs/node/issues/50444
---
 test/parallel/test-dns-resolveany-bad-ancount.js | 5 +++--
 test/parallel/test-dns-resolveany.js             | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/test/parallel/test-dns-resolveany-bad-ancount.js b/test/parallel/test-dns-resolveany-bad-ancount.js
index 71fcbe03cd58f1..5cea388ad6a9f0 100644
--- a/test/parallel/test-dns-resolveany-bad-ancount.js
+++ b/test/parallel/test-dns-resolveany-bad-ancount.js
@@ -30,13 +30,14 @@ server.bind(0, common.mustCall(async () => {
   dnsPromises.resolveAny('example.org')
     .then(common.mustNotCall())
     .catch(common.expectsError({
-      code: 'EBADRESP',
+      // May return EBADRESP or ETIMEOUT
+      code: /^(?:EBADRESP|ETIMEOUT)$/,
       syscall: 'queryAny',
       hostname: 'example.org'
     }));
 
   dns.resolveAny('example.org', common.mustCall((err) => {
-    assert.strictEqual(err.code, 'EBADRESP');
+    assert.notStrictEqual(err.code, 'SUCCESS');
     assert.strictEqual(err.syscall, 'queryAny');
     assert.strictEqual(err.hostname, 'example.org');
     const descriptor = Object.getOwnPropertyDescriptor(err, 'message');
diff --git a/test/parallel/test-dns-resolveany.js b/test/parallel/test-dns-resolveany.js
index 0bbfe8f9f18432..f64dbfc93e2da8 100644
--- a/test/parallel/test-dns-resolveany.js
+++ b/test/parallel/test-dns-resolveany.js
@@ -11,7 +11,7 @@ const answers = [
   { type: 'AAAA', address: '::42', ttl: 123 },
   { type: 'MX', priority: 42, exchange: 'foobar.com', ttl: 124 },
   { type: 'NS', value: 'foobar.org', ttl: 457 },
-  { type: 'TXT', entries: [ 'v=spf1 ~all', 'xyz\0foo' ] },
+  { type: 'TXT', entries: [ 'v=spf1 ~all xyz\0foo' ] },
   { type: 'PTR', value: 'baz.org', ttl: 987 },
   {
     type: 'SOA',

From 2ddeead4368e052e05513c67defd5917b6d20f65 Mon Sep 17 00:00:00 2001
From: Colin Ihrig 
Date: Fri, 24 Nov 2023 10:06:29 -0500
Subject: [PATCH 192/229] meta: move cjihrig to TSC regular member
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50816
Reviewed-By: James M Snell 
Reviewed-By: Antoine du Hamel 
Reviewed-By: Benjamin Gruenbaum 
Reviewed-By: Richard Lau 
Reviewed-By: Rich Trott 
Reviewed-By: Matteo Collina 
Reviewed-By: Darshan Sen 
Reviewed-By: Moshe Atlow 
Reviewed-By: Tobias Nießen 
Reviewed-By: Michael Dawson 
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 836b644586ab39..22f9d634ccadd3 100644
--- a/README.md
+++ b/README.md
@@ -172,8 +172,6 @@ For information about the governance of the Node.js project, see
   **Benjamin Gruenbaum** <>
 * [BridgeAR](https://github.com/BridgeAR) -
   **Ruben Bridgewater** <> (he/him)
-* [cjihrig](https://github.com/cjihrig) -
-  **Colin Ihrig** <> (he/him)
 * [GeoffreyBooth](https://github.com/geoffreybooth) -
   **Geoffrey Booth** <> (he/him)
 * [gireeshpunathil](https://github.com/gireeshpunathil) -
@@ -213,6 +211,8 @@ For information about the governance of the Node.js project, see
   **Ben Noordhuis** <>
 * [ChALkeR](https://github.com/ChALkeR) -
   **Сковорода Никита Андреевич** <> (he/him)
+* [cjihrig](https://github.com/cjihrig) -
+  **Colin Ihrig** <> (he/him)
 * [codebytere](https://github.com/codebytere) -
   **Shelley Vohr** <> (she/her)
 * [danbev](https://github.com/danbev) -

From 158db2d61e842d89523348955566d7bb864e56cc Mon Sep 17 00:00:00 2001
From: Michael Dawson 
Date: Fri, 24 Nov 2023 12:39:39 -0500
Subject: [PATCH 193/229] src: fix coverity warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- reduce copying by using std::move

Signed-off-by: Michael Dawson 
PR-URL: https://github.com/nodejs/node/pull/50846
Reviewed-By: Tobias Nießen 
Reviewed-By: Marco Ippolito 
Reviewed-By: James M Snell 
Reviewed-By: Luigi Pinca 
---
 src/node_contextify.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/node_contextify.cc b/src/node_contextify.cc
index 9542dfcc7cf9fc..a68f6bb686768e 100644
--- a/src/node_contextify.cc
+++ b/src/node_contextify.cc
@@ -1473,7 +1473,7 @@ void ContextifyContext::ContainsModuleSyntax(
   ContextifyContext::CompileFunctionAndCacheResult(env,
                                                    context,
                                                    &source,
-                                                   params,
+                                                   std::move(params),
                                                    std::vector>(),
                                                    options,
                                                    true,

From cada22e2a4c8042ef31762783d140a69849bdaa3 Mon Sep 17 00:00:00 2001
From: Jungku Lee 
Date: Sat, 25 Nov 2023 02:47:59 +0900
Subject: [PATCH 194/229] fs: fix to not return for void function

PR-URL: https://github.com/nodejs/node/pull/50769
Reviewed-By: Matthew Aitken 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
Reviewed-By: Qingyu Deng 
---
 lib/fs.js | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/lib/fs.js b/lib/fs.js
index 59724e884e6dbe..5e1165453367f7 100644
--- a/lib/fs.js
+++ b/lib/fs.js
@@ -534,7 +534,7 @@ function close(fd, callback = defaultCloseCallback) {
 function closeSync(fd) {
   fd = getValidatedFd(fd);
 
-  return binding.close(fd);
+  binding.close(fd);
 }
 
 /**
@@ -784,7 +784,7 @@ function readv(fd, buffers, position, callback) {
   if (typeof position !== 'number')
     position = null;
 
-  return binding.readBuffers(fd, buffers, position, req);
+  binding.readBuffers(fd, buffers, position, req);
 }
 
 ObjectDefineProperty(readv, kCustomPromisifyArgsSymbol,
@@ -860,7 +860,8 @@ function write(fd, buffer, offsetOrOptions, length, position, callback) {
 
     const req = new FSReqCallback();
     req.oncomplete = wrapper;
-    return binding.writeBuffer(fd, buffer, offset, length, position, req);
+    binding.writeBuffer(fd, buffer, offset, length, position, req);
+    return;
   }
 
   validateStringAfterArrayBufferView(buffer, 'buffer');
@@ -881,7 +882,7 @@ function write(fd, buffer, offsetOrOptions, length, position, callback) {
 
   const req = new FSReqCallback();
   req.oncomplete = wrapper;
-  return binding.writeString(fd, str, offset, length, req);
+  binding.writeString(fd, str, offset, length, req);
 }
 
 ObjectDefineProperty(write, kCustomPromisifyArgsSymbol,
@@ -971,7 +972,7 @@ function writev(fd, buffers, position, callback) {
   if (typeof position !== 'number')
     position = null;
 
-  return binding.writeBuffers(fd, buffers, position, req);
+  binding.writeBuffers(fd, buffers, position, req);
 }
 
 ObjectDefineProperty(writev, kCustomPromisifyArgsSymbol, {
@@ -1177,7 +1178,8 @@ function rmdir(path, options, callback) {
         if (err === false) {
           const req = new FSReqCallback();
           req.oncomplete = callback;
-          return binding.rmdir(path, req);
+          binding.rmdir(path, req);
+          return;
         }
         if (err) {
           return callback(err);
@@ -1190,7 +1192,7 @@ function rmdir(path, options, callback) {
     validateRmdirOptions(options);
     const req = new FSReqCallback();
     req.oncomplete = callback;
-    return binding.rmdir(path, req);
+    binding.rmdir(path, req);
   }
 }
 
@@ -1296,7 +1298,7 @@ function fdatasync(fd, callback) {
  */
 function fdatasyncSync(fd) {
   fd = getValidatedFd(fd);
-  return binding.fdatasync(fd);
+  binding.fdatasync(fd);
 }
 
 /**
@@ -1321,7 +1323,7 @@ function fsync(fd, callback) {
  */
 function fsyncSync(fd) {
   fd = getValidatedFd(fd);
-  return binding.fsync(fd);
+  binding.fsync(fd);
 }
 
 /**
@@ -1888,7 +1890,7 @@ function unlink(path, callback) {
  */
 function unlinkSync(path) {
   path = pathModule.toNamespacedPath(getValidatedPath(path));
-  return binding.unlink(path);
+  binding.unlink(path);
 }
 
 /**
@@ -2929,7 +2931,7 @@ realpath.native = (path, options, callback) => {
   path = getValidatedPath(path);
   const req = new FSReqCallback();
   req.oncomplete = callback;
-  return binding.realpath(pathModule.toNamespacedPath(path), options.encoding, req);
+  binding.realpath(pathModule.toNamespacedPath(path), options.encoding, req);
 };
 
 /**

From fe315055a7dba26782e58f5c2bde3b13b250c277 Mon Sep 17 00:00:00 2001
From: Yagiz Nizipli 
Date: Fri, 24 Nov 2023 14:13:14 -0500
Subject: [PATCH 195/229] doc: update email to reflect affiliation

PR-URL: https://github.com/nodejs/node/pull/50856
Reviewed-By: Matteo Collina 
Reviewed-By: Luigi Pinca 
Reviewed-By: James M Snell 
Reviewed-By: Ruy Adorno 
---
 .mailmap  | 1 +
 README.md | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.mailmap b/.mailmap
index 24ff039703b9c1..9535707681f48a 100644
--- a/.mailmap
+++ b/.mailmap
@@ -583,6 +583,7 @@ xiaoyu <306766053@qq.com>
 Xu Meng  
 Xuguang Mei  
 Yael Hermon  
+Yagiz Nizipli  
 Yang Guo 
 Yash Ladha  <18033231+yashLadha@users.noreply.github.com>
 Yash Ladha  
diff --git a/README.md b/README.md
index 22f9d634ccadd3..f09e1044227a10 100644
--- a/README.md
+++ b/README.md
@@ -165,7 +165,7 @@ For information about the governance of the Node.js project, see
 * [aduh95](https://github.com/aduh95) -
   **Antoine du Hamel** <> (he/him)
 * [anonrig](https://github.com/anonrig) -
-  **Yagiz Nizipli** <> (he/him)
+  **Yagiz Nizipli** <> (he/him)
 * [apapirovski](https://github.com/apapirovski) -
   **Anatoli Papirovski** <> (he/him)
 * [benjamingr](https://github.com/benjamingr) -
@@ -288,7 +288,7 @@ For information about the governance of the Node.js project, see
 * [aduh95](https://github.com/aduh95) -
   **Antoine du Hamel** <> (he/him)
 * [anonrig](https://github.com/anonrig) -
-  **Yagiz Nizipli** <> (he/him)
+  **Yagiz Nizipli** <> (he/him)
 * [antsmartian](https://github.com/antsmartian) -
   **Anto Aravinth** <> (he/him)
 * [apapirovski](https://github.com/apapirovski) -

From d53842683f71b7f563b72d65e81a4412041c7109 Mon Sep 17 00:00:00 2001
From: Antoine du Hamel 
Date: Sat, 25 Nov 2023 10:15:58 +0100
Subject: [PATCH 196/229] doc: add a section regarding `instanceof` in
 `primordials.md`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50874
Reviewed-By: Matteo Collina 
Reviewed-By: Chengzhong Wu 
Reviewed-By: Michaël Zasso 
---
 doc/contributing/primordials.md | 46 +++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/doc/contributing/primordials.md b/doc/contributing/primordials.md
index b763c88880f94c..4ca383c41cbd25 100644
--- a/doc/contributing/primordials.md
+++ b/doc/contributing/primordials.md
@@ -775,3 +775,49 @@ const proxyWithNullPrototypeObject = new Proxy(objectToProxy, {
 });
 console.log(proxyWithNullPrototypeObject.someProperty); // genuine value
 ```
+
+### Checking if an object is an instance of a class
+
+#### Using `instanceof` looks up the `@@hasInstance` property of the class
+
+```js
+// User-land
+Object.defineProperty(Array, Symbol.hasInstance, {
+  __proto__: null,
+  value: () => true,
+});
+Object.defineProperty(Date, Symbol.hasInstance, {
+  __proto__: null,
+  value: () => false,
+});
+
+// Core
+const {
+  FunctionPrototypeSymbolHasInstance,
+} = primordials;
+
+console.log(new Date() instanceof Array); // true
+console.log(new Date() instanceof Date); // false
+
+console.log(FunctionPrototypeSymbolHasInstance(Array, new Date())); // false
+console.log(FunctionPrototypeSymbolHasInstance(Date, new Date())); // true
+```
+
+Even without user mutations, the result of `instanceof` can be deceiving when
+dealing with values from different realms:
+
+```js
+const vm = require('node:vm');
+
+console.log(vm.runInNewContext('[]') instanceof Array); // false
+console.log(vm.runInNewContext('[]') instanceof vm.runInNewContext('Array')); // false
+console.log([] instanceof vm.runInNewContext('Array')); // false
+
+console.log(Array.isArray(vm.runInNewContext('[]'))); // true
+console.log(vm.runInNewContext('Array').isArray(vm.runInNewContext('[]'))); // true
+console.log(vm.runInNewContext('Array').isArray([])); // true
+```
+
+In general, using `instanceof` (or `FunctionPrototypeSymbolHasInstance`) checks
+is not recommended, consider checking for the presence of properties or methods
+for more reliable results.

From f4070dd8d42bf12d912d1aa91ed9ae0740f9f8ff Mon Sep 17 00:00:00 2001
From: Antoine du Hamel 
Date: Sat, 25 Nov 2023 17:32:15 +0100
Subject: [PATCH 197/229] meta: clarify recommendation for bug reproductions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sometimes reporters link to a repo for their repro, which
not only likely takes them more time to setup, but also is less
convenient for maintainers. Setting up a repo goes against the idea of a
minimal repro, as if it was actually minimal, they would not have
bothered creating a repo.

PR-URL: https://github.com/nodejs/node/pull/50882
Reviewed-By: Moshe Atlow 
Reviewed-By: Luigi Pinca 
Reviewed-By: Robert Nagy 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Darshan Sen 
Reviewed-By: Gireesh Punathil 
Reviewed-By: Michaël Zasso 
Reviewed-By: Ruy Adorno 
---
 .github/ISSUE_TEMPLATE/1-bug-report.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.github/ISSUE_TEMPLATE/1-bug-report.yml b/.github/ISSUE_TEMPLATE/1-bug-report.yml
index 496d06d3ad70b8..72292b65430d7a 100644
--- a/.github/ISSUE_TEMPLATE/1-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml
@@ -27,7 +27,11 @@ body:
   - type: textarea
     attributes:
       label: What steps will reproduce the bug?
-      description: Enter details about your bug, preferably a simple code snippet that can be run using `node` directly without installing third-party dependencies.
+      description: >
+        Enter details about your bug, preferably a simple code snippet that can
+        be run using `node` directly without installing third-party dependencies
+        or downloading code from the internet (i.e. no ZIP archive, no GitHub
+        repository, etc.).
   - type: textarea
     attributes:
       label: How often does it reproduce? Is there a required condition?

From 07a4e94e8488979c942769874b6325542388a7fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20Nie=C3=9Fen?= 
Date: Sat, 25 Nov 2023 20:57:53 +0100
Subject: [PATCH 198/229] src: assert return value of BN_bn2binpad

Every other invocation of BN_bn2binpad checks the return value. For
safety and consistency, do so in RandomPrimeTraits::EncodeOutput()
as well.

PR-URL: https://github.com/nodejs/node/pull/50860
Reviewed-By: Filip Skokan 
Reviewed-By: Luigi Pinca 
---
 src/crypto/crypto_random.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/crypto/crypto_random.cc b/src/crypto/crypto_random.cc
index 245f3529186964..48154df7dc91ed 100644
--- a/src/crypto/crypto_random.cc
+++ b/src/crypto/crypto_random.cc
@@ -75,10 +75,10 @@ Maybe RandomPrimeTraits::EncodeOutput(
   size_t size = BN_num_bytes(params.prime.get());
   std::shared_ptr store =
       ArrayBuffer::NewBackingStore(env->isolate(), size);
-  BN_bn2binpad(
-      params.prime.get(),
-      reinterpret_cast(store->Data()),
-      size);
+  CHECK_EQ(static_cast(size),
+           BN_bn2binpad(params.prime.get(),
+                        reinterpret_cast(store->Data()),
+                        size));
   *result = ArrayBuffer::New(env->isolate(), store);
   return Just(true);
 }

From e40a559ab1c3a5e5a95610fb808064c6bef386d7 Mon Sep 17 00:00:00 2001
From: Liu Jia <109258120+Septa2112@users.noreply.github.com>
Date: Sun, 26 Nov 2023 21:42:55 +0800
Subject: [PATCH 199/229] benchmark: update iterations in
 benchmark/util/splice-one.js
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Increase the number of iterations from 1e5 to 5e6
to avoid the test performance gap caused by inactive
V8 optimization caused by insufficient number of iterations

Refs: https://github.com/nodejs/node/issues/50571
PR-URL: https://github.com/nodejs/node/pull/50698
Reviewed-By: Vinícius Lourenço Claro Cardoso 
---
 benchmark/util/splice-one.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/benchmark/util/splice-one.js b/benchmark/util/splice-one.js
index 6a5a634640add4..211e4fad571835 100644
--- a/benchmark/util/splice-one.js
+++ b/benchmark/util/splice-one.js
@@ -3,7 +3,7 @@
 const common = require('../common');
 
 const bench = common.createBenchmark(main, {
-  n: [1e5],
+  n: [5e6],
   pos: ['start', 'middle', 'end'],
   size: [10, 100, 500],
 }, { flags: ['--expose-internals'] });

From 0c85cebdf2d552eccc36064a5c338b3327dcf539 Mon Sep 17 00:00:00 2001
From: Matteo Collina 
Date: Sun, 26 Nov 2023 18:14:19 +0100
Subject: [PATCH 200/229] meta: clarify nomination process according to Node.js
 charter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Matteo Collina 
PR-URL: https://github.com/nodejs/node/pull/50834
Reviewed-By: Gireesh Punathil 
Reviewed-By: Rafael Gonzaga 
Reviewed-By: Robert Nagy 
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Antoine du Hamel 
Reviewed-By: Michael Dawson 
Reviewed-By: Joyee Cheung 
Reviewed-By: Benjamin Gruenbaum 
Reviewed-By: Luigi Pinca 
Reviewed-By: Darshan Sen 
Reviewed-By: Michaël Zasso 
Reviewed-By: Chengzhong Wu 
Reviewed-By: James M Snell 
Reviewed-By: Ruy Adorno 
---
 GOVERNANCE.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/GOVERNANCE.md b/GOVERNANCE.md
index 6ba1ef2daf2c3d..d29012745ea87f 100644
--- a/GOVERNANCE.md
+++ b/GOVERNANCE.md
@@ -151,8 +151,9 @@ Provide a summary of the nominee's contributions. For example:
 Mention @nodejs/collaborators in the issue to notify other collaborators about
 the nomination.
 
-The nomination passes if no collaborators oppose it after one week. Otherwise,
-the nomination fails.
+The nomination passes if no collaborators oppose it after one week. In the case
+of an objection, the TSC is responsible for working with the individuals
+involved and finding a resolution.
 
 There are steps a nominator can take in advance to make a nomination as
 frictionless as possible. To request feedback from other collaborators in

From f94a24cb4b24121428ccdfdcc1b341a344cce9e6 Mon Sep 17 00:00:00 2001
From: CanadaHonk 
Date: Sun, 26 Nov 2023 19:23:41 +0000
Subject: [PATCH 201/229] fs: improve error performance for `rmdirSync`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/49846
Refs: https://github.com/nodejs/performance/issues/106
Reviewed-By: Yagiz Nizipli 
Reviewed-By: Joyee Cheung 
Reviewed-By: Vinícius Lourenço Claro Cardoso 
---
 benchmark/fs/bench-rmdirSync.js | 42 +++++++++++++++++++++++++++++++++
 lib/fs.js                       |  4 +---
 src/node_file.cc                | 14 +++++------
 typings/internalBinding/fs.d.ts |  2 +-
 4 files changed, 50 insertions(+), 12 deletions(-)
 create mode 100644 benchmark/fs/bench-rmdirSync.js

diff --git a/benchmark/fs/bench-rmdirSync.js b/benchmark/fs/bench-rmdirSync.js
new file mode 100644
index 00000000000000..48103439274371
--- /dev/null
+++ b/benchmark/fs/bench-rmdirSync.js
@@ -0,0 +1,42 @@
+'use strict';
+
+const common = require('../common');
+const fs = require('fs');
+const tmpdir = require('../../test/common/tmpdir');
+tmpdir.refresh();
+
+const bench = common.createBenchmark(main, {
+  type: ['existing', 'non-existing'],
+  n: [1e4],
+});
+
+function main({ n, type }) {
+  switch (type) {
+    case 'existing': {
+      for (let i = 0; i < n; i++) {
+        fs.mkdirSync(tmpdir.resolve(`rmdirsync-bench-dir-${i}`));
+      }
+
+      bench.start();
+      for (let i = 0; i < n; i++) {
+        fs.rmdirSync(tmpdir.resolve(`rmdirsync-bench-dir-${i}`));
+      }
+      bench.end(n);
+      break;
+    }
+    case 'non-existing': {
+      bench.start();
+      for (let i = 0; i < n; i++) {
+        try {
+          fs.rmdirSync(tmpdir.resolve(`.non-existent-folder-${i}`));
+        } catch {
+          // do nothing
+        }
+      }
+      bench.end(n);
+      break;
+    }
+    default:
+      new Error('Invalid type');
+  }
+}
diff --git a/lib/fs.js b/lib/fs.js
index 5e1165453367f7..d8974e67128fb0 100644
--- a/lib/fs.js
+++ b/lib/fs.js
@@ -1220,9 +1220,7 @@ function rmdirSync(path, options) {
     validateRmdirOptions(options);
   }
 
-  const ctx = { path };
-  binding.rmdir(pathModule.toNamespacedPath(path), undefined, ctx);
-  return handleErrorFromBinding(ctx);
+  binding.rmdir(pathModule.toNamespacedPath(path));
 }
 
 /**
diff --git a/src/node_file.cc b/src/node_file.cc
index df0bf442bd58db..b3c52e04fac8f0 100644
--- a/src/node_file.cc
+++ b/src/node_file.cc
@@ -1588,25 +1588,23 @@ static void RMDir(const FunctionCallbackInfo& args) {
   Environment* env = Environment::GetCurrent(args);
 
   const int argc = args.Length();
-  CHECK_GE(argc, 2);
+  CHECK_GE(argc, 1);
 
   BufferValue path(env->isolate(), args[0]);
   CHECK_NOT_NULL(*path);
   THROW_IF_INSUFFICIENT_PERMISSIONS(
       env, permission::PermissionScope::kFileSystemWrite, path.ToStringView());
 
-  FSReqBase* req_wrap_async = GetReqWrap(args, 1);  // rmdir(path, req)
-  if (req_wrap_async != nullptr) {
+  if (argc > 1) {
+    FSReqBase* req_wrap_async = GetReqWrap(args, 1);  // rmdir(path, req)
     FS_ASYNC_TRACE_BEGIN1(
         UV_FS_RMDIR, req_wrap_async, "path", TRACE_STR_COPY(*path))
     AsyncCall(env, req_wrap_async, args, "rmdir", UTF8, AfterNoArgs,
               uv_fs_rmdir, *path);
-  } else {  // rmdir(path, undefined, ctx)
-    CHECK_EQ(argc, 3);
-    FSReqWrapSync req_wrap_sync;
+  } else {  // rmdir(path)
+    FSReqWrapSync req_wrap_sync("rmdir", *path);
     FS_SYNC_TRACE_BEGIN(rmdir);
-    SyncCall(env, args[2], &req_wrap_sync, "rmdir",
-             uv_fs_rmdir, *path);
+    SyncCallAndThrowOnError(env, &req_wrap_sync, uv_fs_rmdir, *path);
     FS_SYNC_TRACE_END(rmdir);
   }
 }
diff --git a/typings/internalBinding/fs.d.ts b/typings/internalBinding/fs.d.ts
index 77f20e9550e30a..67c14c73acf7f1 100644
--- a/typings/internalBinding/fs.d.ts
+++ b/typings/internalBinding/fs.d.ts
@@ -192,7 +192,7 @@ declare namespace InternalFSBinding {
   function rename(oldPath: string, newPath: string): void;
 
   function rmdir(path: string, req: FSReqCallback): void;
-  function rmdir(path: string, req: undefined, ctx: FSSyncContext): void;
+  function rmdir(path: string): void;
   function rmdir(path: string, usePromises: typeof kUsePromises): Promise;
 
   function stat(path: StringOrBuffer, useBigint: boolean, req: FSReqCallback): void;

From 032535e2706224cc1472a45957e440ab254722fb Mon Sep 17 00:00:00 2001
From: Dima Demakov 
Date: Mon, 27 Nov 2023 10:03:27 +0100
Subject: [PATCH 202/229] doc: make theme consistent across api and other docs

Since website based on 2 different repos, there was an inconsistency
in theme selection, so we had 2 independant theme props.
Now only one stored in local storage is a single source of truth

PR-URL: https://github.com/nodejs/node/pull/50877
Reviewed-By: Claudio Wunder 
Reviewed-By: Yagiz Nizipli 
---
 BUILDING.md                            |  6 ++++++
 doc/api_assets/api.js                  | 14 +++++++-------
 doc/contributing/collaborator-guide.md |  7 ++++---
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/BUILDING.md b/BUILDING.md
index 45d062f285ec99..e169d7cfe04557 100644
--- a/BUILDING.md
+++ b/BUILDING.md
@@ -462,6 +462,12 @@ make docopen
 This will open a file URL to a one-page version of all the browsable HTML
 documents using the default browser.
 
+```bash
+make docclean
+```
+
+This will clean previously built doc.
+
 To test if Node.js was built correctly:
 
 ```bash
diff --git a/doc/api_assets/api.js b/doc/api_assets/api.js
index c7568206e29f19..8b40042f85cbae 100644
--- a/doc/api_assets/api.js
+++ b/doc/api_assets/api.js
@@ -2,11 +2,11 @@
 
 {
   function setupTheme() {
-    const kCustomPreference = 'customDarkTheme';
-    const userSettings = sessionStorage.getItem(kCustomPreference);
+    const storedTheme = localStorage.getItem('theme');
     const themeToggleButton = document.getElementById('theme-toggle-btn');
 
-    if (userSettings === null && window.matchMedia) {
+    // Follow operating system theme preference
+    if (storedTheme === null && window.matchMedia) {
       const mq = window.matchMedia('(prefers-color-scheme: dark)');
 
       if ('onchange' in mq) {
@@ -28,16 +28,16 @@
       if (mq.matches) {
         document.documentElement.classList.add('dark-mode');
       }
-    } else if (userSettings === 'true') {
+    } else if (storedTheme === 'dark') {
       document.documentElement.classList.add('dark-mode');
     }
 
     if (themeToggleButton) {
       themeToggleButton.hidden = false;
       themeToggleButton.addEventListener('click', function() {
-        sessionStorage.setItem(
-          kCustomPreference,
-          document.documentElement.classList.toggle('dark-mode'),
+        localStorage.setItem(
+          'theme',
+          document.documentElement.classList.toggle('dark-mode') ? 'dark' : 'light',
         );
       });
     }
diff --git a/doc/contributing/collaborator-guide.md b/doc/contributing/collaborator-guide.md
index 2ecf14a082dc3b..2bf751b7076139 100644
--- a/doc/contributing/collaborator-guide.md
+++ b/doc/contributing/collaborator-guide.md
@@ -532,9 +532,10 @@ The TSC serves as the final arbiter where required.
    [build](https://github.com/nodejs/build/issues) repositories, open new
    issues. Run a new CI any time someone pushes new code to the pull request.
 4. Check that the commit message adheres to [commit message guidelines][].
-5. Add all necessary [metadata](#metadata) to commit messages before landing. If
-   you are unsure exactly how to format the commit messages, use the commit log
-   as a reference. See [this commit][commit-example] as an example.
+5. Add all necessary [metadata][git-node-metadata] to commit messages before
+   landing. If you are unsure exactly how to format the commit messages, use
+   the commit log as a reference. See [this commit][commit-example] as an
+   example.
 
 For pull requests from first-time contributors, be
 [welcoming](#welcoming-first-time-contributors). Also, verify that their git

From 1862235a26a655dcd21277d8f920481b19e1991d Mon Sep 17 00:00:00 2001
From: Jithil P Ponnan 
Date: Wed, 15 Nov 2023 00:45:34 +1100
Subject: [PATCH 203/229] test: fix message v8 not normalising alphanumeric
 paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-URL: https://github.com/nodejs/node/pull/50730
Fixes: https://github.com/nodejs/node/issues/50724
Reviewed-By: Moshe Atlow 
Reviewed-By: Vinícius Lourenço Claro Cardoso 
Reviewed-By: Rafael Gonzaga 
---
 test/parallel/test-node-output-v8-warning.mjs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/parallel/test-node-output-v8-warning.mjs b/test/parallel/test-node-output-v8-warning.mjs
index 7278a03ce178e7..8e497739d21c70 100644
--- a/test/parallel/test-node-output-v8-warning.mjs
+++ b/test/parallel/test-node-output-v8-warning.mjs
@@ -13,6 +13,7 @@ describe('v8 output', { concurrency: true }, () => {
     .replaceAll(/:\d+/g, ':*')
     .replaceAll('/', '*')
     .replaceAll('*test*', '*')
+    .replaceAll(/.*?\*fixtures\*v8\*/g, '(node:*) V8: *') // Replace entire path before fixtures/v8
     .replaceAll('*fixtures*v8*', '*')
     .replaceAll('node --', '* --');
   }

From 8d5469c84bda957d4fc7cbdbcfd93b289cfd1a6f Mon Sep 17 00:00:00 2001
From: Francesco Trotta 
Date: Wed, 29 Nov 2023 10:21:54 +0100
Subject: [PATCH 204/229] esm: do not call `getSource` when format is
 `commonjs`

Ensure that `defaultLoad` does not uselessly access the file system to
get the source of modules that are known to be in CommonJS format.

This allows CommonJS imports to resolve in the current phase of the
event loop.

Refs: https://github.com/eslint/eslint/pull/17683
PR-URL: https://github.com/nodejs/node/pull/50465
Reviewed-By: James M Snell 
Reviewed-By: Antoine du Hamel 
---
 lib/internal/modules/esm/load.js              | 19 +++++++++--------
 .../test-esm-dynamic-import-commonjs.js       | 19 +++++++++++++++++
 .../test-esm-dynamic-import-commonjs.mjs      |  9 ++++++++
 .../es-module/test-esm-loader-with-source.mjs | 10 +++++++++
 test/fixtures/empty.cjs                       |  0
 .../es-module-loaders/preset-cjs-source.mjs   | 21 +++++++++++++++++++
 6 files changed, 69 insertions(+), 9 deletions(-)
 create mode 100644 test/es-module/test-esm-dynamic-import-commonjs.js
 create mode 100644 test/es-module/test-esm-dynamic-import-commonjs.mjs
 create mode 100644 test/es-module/test-esm-loader-with-source.mjs
 create mode 100644 test/fixtures/empty.cjs
 create mode 100644 test/fixtures/es-module-loaders/preset-cjs-source.mjs

diff --git a/lib/internal/modules/esm/load.js b/lib/internal/modules/esm/load.js
index bcebc283828ad5..70d1b7fe83fa35 100644
--- a/lib/internal/modules/esm/load.js
+++ b/lib/internal/modules/esm/load.js
@@ -132,20 +132,21 @@ async function defaultLoad(url, context = kEmptyObject) {
   if (urlInstance.protocol === 'node:') {
     source = null;
     format ??= 'builtin';
-  } else {
-    let contextToPass = context;
+  } else if (format !== 'commonjs' || defaultType === 'module') {
     if (source == null) {
       ({ responseURL, source } = await getSource(urlInstance, context));
-      contextToPass = { __proto__: context, source };
+      context = { __proto__: context, source };
     }
 
-    // Now that we have the source for the module, run `defaultGetFormat` again in case we detect ESM syntax.
-    format ??= await defaultGetFormat(urlInstance, contextToPass);
+    if (format == null) {
+      // Now that we have the source for the module, run `defaultGetFormat` to detect its format.
+      format = await defaultGetFormat(urlInstance, context);
 
-    if (format === 'commonjs' && contextToPass !== context && defaultType !== 'module') {
-      // For backward compatibility reasons, we need to discard the source in
-      // order for the CJS loader to re-fetch it.
-      source = null;
+      if (format === 'commonjs') {
+        // For backward compatibility reasons, we need to discard the source in
+        // order for the CJS loader to re-fetch it.
+        source = null;
+      }
     }
   }
 
diff --git a/test/es-module/test-esm-dynamic-import-commonjs.js b/test/es-module/test-esm-dynamic-import-commonjs.js
new file mode 100644
index 00000000000000..9022b83f1fcffa
--- /dev/null
+++ b/test/es-module/test-esm-dynamic-import-commonjs.js
@@ -0,0 +1,19 @@
+'use strict';
+
+const common = require('../common');
+const fixtures = require('../common/fixtures');
+const assert = require('node:assert');
+
+(async () => {
+
+  // Make sure that the CommonJS module lexer has been initialized.
+  // See https://github.com/nodejs/node/blob/v21.1.0/lib/internal/modules/esm/translators.js#L61-L81.
+  await import(fixtures.fileURL('empty.js'));
+
+  let tickDuringCJSImport = false;
+  process.nextTick(() => { tickDuringCJSImport = true; });
+  await import(fixtures.fileURL('empty.cjs'));
+
+  assert(!tickDuringCJSImport);
+
+})().then(common.mustCall());
diff --git a/test/es-module/test-esm-dynamic-import-commonjs.mjs b/test/es-module/test-esm-dynamic-import-commonjs.mjs
new file mode 100644
index 00000000000000..6a8f2506c0eb05
--- /dev/null
+++ b/test/es-module/test-esm-dynamic-import-commonjs.mjs
@@ -0,0 +1,9 @@
+import '../common/index.mjs';
+import fixtures from '../common/fixtures.js';
+import assert from 'node:assert';
+
+let tickDuringCJSImport = false;
+process.nextTick(() => { tickDuringCJSImport = true; });
+await import(fixtures.fileURL('empty.cjs'));
+
+assert(!tickDuringCJSImport);
diff --git a/test/es-module/test-esm-loader-with-source.mjs b/test/es-module/test-esm-loader-with-source.mjs
new file mode 100644
index 00000000000000..41dca12dfc6bab
--- /dev/null
+++ b/test/es-module/test-esm-loader-with-source.mjs
@@ -0,0 +1,10 @@
+// Flags: --experimental-loader ./test/fixtures/es-module-loaders/preset-cjs-source.mjs
+import '../common/index.mjs';
+import * as fixtures from '../common/fixtures.mjs';
+import assert from 'assert';
+
+const { default: existingFileSource } = await import(fixtures.fileURL('es-modules', 'cjs-file.cjs'));
+const { default: noSuchFileSource } = await import(new URL('./no-such-file.cjs', import.meta.url));
+
+assert.strictEqual(existingFileSource, 'no .cjs file was read to get this source');
+assert.strictEqual(noSuchFileSource, 'no .cjs file was read to get this source');
diff --git a/test/fixtures/empty.cjs b/test/fixtures/empty.cjs
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/test/fixtures/es-module-loaders/preset-cjs-source.mjs b/test/fixtures/es-module-loaders/preset-cjs-source.mjs
new file mode 100644
index 00000000000000..2cfa851a23f889
--- /dev/null
+++ b/test/fixtures/es-module-loaders/preset-cjs-source.mjs
@@ -0,0 +1,21 @@
+export function resolve(specifier, context, next) {
+  if (specifier.endsWith('/no-such-file.cjs')) {
+    // Shortcut to avoid ERR_MODULE_NOT_FOUND for non-existing file, but keep the url for the load hook.
+    return {
+      shortCircuit: true,
+      url: specifier,
+    };
+  }
+  return next(specifier);
+}
+
+export function load(href, context, next) {
+  if (href.endsWith('.cjs')) {
+    return {
+      format: 'commonjs',
+      shortCircuit: true,
+      source: 'module.exports = "no .cjs file was read to get this source";',
+    };
+  }
+  return next(href);
+}

From 843d5f84ca0c34d4cc59194fc1284a2c631ead97 Mon Sep 17 00:00:00 2001
From: Antoine du Hamel 
Date: Wed, 29 Nov 2023 10:12:05 +0100
Subject: [PATCH 205/229] esm: fallback to `getSource` when `load` returns
 nullish `source`

When using the Modules Customization Hooks API to load CommonJS modules,
we want to support the returned value of `defaultLoad` which must be
nullish to preserve backward compatibility. This can be achieved by
fetching the source from the translator.

PR-URL: https://github.com/nodejs/node/pull/50825
Fixes: https://github.com/nodejs/node/issues/50435
Reviewed-By: Geoffrey Booth 
Reviewed-By: Jacob Smith 
---
 lib/internal/modules/esm/load.js         |  1 +
 lib/internal/modules/esm/translators.js  | 35 ++++++++++++++++--------
 test/es-module/test-esm-loader-hooks.mjs | 35 ++++++++++++++++++++++++
 3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/lib/internal/modules/esm/load.js b/lib/internal/modules/esm/load.js
index 70d1b7fe83fa35..5239bc8ed883a5 100644
--- a/lib/internal/modules/esm/load.js
+++ b/lib/internal/modules/esm/load.js
@@ -262,5 +262,6 @@ function throwUnknownModuleFormat(url, format) {
 module.exports = {
   defaultLoad,
   defaultLoadSync,
+  getSourceSync,
   throwUnknownModuleFormat,
 };
diff --git a/lib/internal/modules/esm/translators.js b/lib/internal/modules/esm/translators.js
index 7a62615cfe4210..9976b819f266c6 100644
--- a/lib/internal/modules/esm/translators.js
+++ b/lib/internal/modules/esm/translators.js
@@ -31,6 +31,7 @@ function lazyTypes() {
 }
 
 const { containsModuleSyntax } = internalBinding('contextify');
+const { BuiltinModule } = require('internal/bootstrap/realm');
 const assert = require('internal/assert');
 const { readFileSync } = require('fs');
 const { dirname, extname, isAbsolute } = require('path');
@@ -58,6 +59,17 @@ const asyncESM = require('internal/process/esm_loader');
 const { emitWarningSync } = require('internal/process/warning');
 const { internalCompileFunction } = require('internal/vm');
 
+// Lazy-loading to avoid circular dependencies.
+let getSourceSync;
+/**
+ * @param {Parameters[0]} url
+ * @returns {ReturnType}
+ */
+function getSource(url) {
+  getSourceSync ??= require('internal/modules/esm/load').getSourceSync;
+  return getSourceSync(url);
+}
+
 /** @type {import('deps/cjs-module-lexer/lexer.js').parse} */
 let cjsParse;
 /**
@@ -225,21 +237,19 @@ function loadCJSModule(module, source, url, filename) {
   // eslint-disable-next-line func-name-matching,func-style
   const requireFn = function require(specifier) {
     let importAttributes = kEmptyObject;
-    if (!StringPrototypeStartsWith(specifier, 'node:')) {
+    if (!StringPrototypeStartsWith(specifier, 'node:') && !BuiltinModule.normalizeRequirableId(specifier)) {
       // TODO: do not depend on the monkey-patchable CJS loader here.
       const path = CJSModule._resolveFilename(specifier, module);
-      if (specifier !== path) {
-        switch (extname(path)) {
-          case '.json':
-            importAttributes = { __proto__: null, type: 'json' };
-            break;
-          case '.node':
-            return CJSModule._load(specifier, module);
-          default:
+      switch (extname(path)) {
+        case '.json':
+          importAttributes = { __proto__: null, type: 'json' };
+          break;
+        case '.node':
+          return CJSModule._load(specifier, module);
+        default:
             // fall through
-        }
-        specifier = `${pathToFileURL(path)}`;
       }
+      specifier = `${pathToFileURL(path)}`;
     }
     const job = asyncESM.esmLoader.getModuleJobSync(specifier, url, importAttributes);
     job.runSync();
@@ -276,7 +286,8 @@ function createCJSModuleWrap(url, source, isMain, loadCJS = loadCJSModule) {
   debug(`Translating CJSModule ${url}`);
 
   const filename = StringPrototypeStartsWith(url, 'file://') ? fileURLToPath(url) : url;
-  source = stringify(source);
+  // In case the source was not provided by the `load` step, we need fetch it now.
+  source = stringify(source ?? getSource(new URL(url)).source);
 
   const { exportNames, module } = cjsPreparseModuleExports(filename, source);
   cjsCache.set(url, module);
diff --git a/test/es-module/test-esm-loader-hooks.mjs b/test/es-module/test-esm-loader-hooks.mjs
index 6544c05ec93f8e..d5fee004392b44 100644
--- a/test/es-module/test-esm-loader-hooks.mjs
+++ b/test/es-module/test-esm-loader-hooks.mjs
@@ -736,4 +736,39 @@ describe('Loader hooks', { concurrency: true }, () => {
     assert.strictEqual(code, 0);
     assert.strictEqual(signal, null);
   });
+
+  it('should handle mixed of opt-in modules and non-opt-in ones', async () => {
+    const { code, signal, stdout, stderr } = await spawnPromisified(execPath, [
+      '--no-warnings',
+      '--experimental-loader',
+      `data:text/javascript,const fixtures=${JSON.stringify(fixtures.path('empty.js'))};export ${
+        encodeURIComponent(function resolve(s, c, n) {
+          if (s.endsWith('entry-point')) {
+            return {
+              shortCircuit: true,
+              url: 'file:///c:/virtual-entry-point',
+              format: 'commonjs',
+            };
+          }
+          return n(s, c);
+        })
+      }export ${
+        encodeURIComponent(async function load(u, c, n) {
+          if (u === 'file:///c:/virtual-entry-point') {
+            return {
+              shortCircuit: true,
+              source: `"use strict";require(${JSON.stringify(fixtures)});console.log("Hello");`,
+              format: 'commonjs',
+            };
+          }
+          return n(u, c);
+        })}`,
+      'entry-point',
+    ]);
+
+    assert.strictEqual(stderr, '');
+    assert.strictEqual(stdout, 'Hello\n');
+    assert.strictEqual(code, 0);
+    assert.strictEqual(signal, null);
+  });
 });

From 402e2575205a2cfc4de7f889551cd3085c5d3b7f Mon Sep 17 00:00:00 2001
From: Joyee Cheung 
Date: Thu, 26 Oct 2023 14:11:43 +0200
Subject: [PATCH 206/229] doc: update notable changes in v21.1.0

PR-URL: https://github.com/nodejs/node/pull/50388
Refs: https://github.com/nodejs/nodejs.org/pull/6045
Reviewed-By: Tierney Cyren 
Reviewed-By: Luigi Pinca 
---
 doc/changelogs/CHANGELOG_V21.md | 423 ++++++++++++++++++++++++++++++++
 1 file changed, 423 insertions(+)
 create mode 100644 doc/changelogs/CHANGELOG_V21.md

diff --git a/doc/changelogs/CHANGELOG_V21.md b/doc/changelogs/CHANGELOG_V21.md
new file mode 100644
index 00000000000000..4804d815c5fe0f
--- /dev/null
+++ b/doc/changelogs/CHANGELOG_V21.md
@@ -0,0 +1,423 @@
+# Node.js 21 ChangeLog
+
+
+
+
+
+
+
+
+
+
+
Current
+21.1.0
+21.0.0
+
+ +* Other Versions + * [20.x](CHANGELOG_V20.md) + * [19.x](CHANGELOG_V19.md) + * [18.x](CHANGELOG_V18.md) + * [17.x](CHANGELOG_V17.md) + * [16.x](CHANGELOG_V16.md) + * [15.x](CHANGELOG_V15.md) + * [14.x](CHANGELOG_V14.md) + * [13.x](CHANGELOG_V13.md) + * [12.x](CHANGELOG_V12.md) + * [11.x](CHANGELOG_V11.md) + * [10.x](CHANGELOG_V10.md) + * [9.x](CHANGELOG_V9.md) + * [8.x](CHANGELOG_V8.md) + * [7.x](CHANGELOG_V7.md) + * [6.x](CHANGELOG_V6.md) + * [5.x](CHANGELOG_V5.md) + * [4.x](CHANGELOG_V4.md) + * [0.12.x](CHANGELOG_V012.md) + * [0.10.x](CHANGELOG_V010.md) + * [io.js](CHANGELOG_IOJS.md) + * [Archive](CHANGELOG_ARCHIVE.md) + + + +## 2023-10-24, Version 21.1.0 (Current), @targos + +### Notable Changes + +#### Automatically detect and run ESM syntax + +The new flag `--experimental-detect-module` can be used to automatically run +ES modules when their syntax can be detected. For “ambiguous” files, which are +`.js` or extensionless files with no `package.json` with a `type` field, Node.js +will parse the file to detect ES module syntax; if found, it will run the file +as an ES module, otherwise it will run the file as a CommonJS module. +The same applies to string input via `--eval` or `STDIN`. + +We hope to make detection enabled by default in a future version of Node.js. +Detection increases startup time, so we encourage everyone — especially package +authors — to add a `type` field to `package.json`, even for the default +`"type": "commonjs"`. The presence of a `type` field, or explicit extensions +such as `.mjs` or `.cjs`, will opt out of detection. + +Contributed by Geoffrey Booth in [#50096](https://github.com/nodejs/node/pull/50096). + +### vm: fix V8 compilation cache support for vm.Script + +Previously repeated compilation of the same source code using `vm.Script` +stopped hitting the V8 compilation cache after v16.x when support for +`importModuleDynamically` was added to `vm.Script`, resulting in a performance +regression that blocked users (in particular Jest users) from upgrading from +v16.x. + +The recent fixes landed in v21.1.0 allow the compilation cache to be hit again +for `vm.Script` when `--experimental-vm-modules` is not used even in the +presence of the `importModuleDynamically` option, so that users affected by the +performance regression can now upgrade. Ongoing work is also being done to +enable compilation cache support for `vm.CompileFunction`. + +Contributed by Joyee Cheung in [#50137](https://github.com/nodejs/node/pull/50137). + +#### Other Notable Changes + +* \[[`3729e33358`](https://github.com/nodejs/node/commit/3729e33358)] - **doc**: add H4ad to collaborators (Vinícius Lourenço) [#50217](https://github.com/nodejs/node/pull/50217) +* \[[`18862e4d5d`](https://github.com/nodejs/node/commit/18862e4d5d)] - **(SEMVER-MINOR)** **fs**: add `flush` option to `appendFile()` functions (Colin Ihrig) [#50095](https://github.com/nodejs/node/pull/50095) +* \[[`5a52c518ef`](https://github.com/nodejs/node/commit/5a52c518ef)] - **(SEMVER-MINOR)** **lib**: add `navigator.userAgent` (Yagiz Nizipli) [#50200](https://github.com/nodejs/node/pull/50200) +* \[[`789372a072`](https://github.com/nodejs/node/commit/789372a072)] - **(SEMVER-MINOR)** **stream**: allow pass stream class to `stream.compose` (Alex Yang) [#50187](https://github.com/nodejs/node/pull/50187) +* \[[`f3a9ea0bc4`](https://github.com/nodejs/node/commit/f3a9ea0bc4)] - **stream**: improve performance of readable stream reads (Raz Luvaton) [#50173](https://github.com/nodejs/node/pull/50173) + +### Commits + +* \[[`9cd68b9083`](https://github.com/nodejs/node/commit/9cd68b9083)] - **buffer**: remove unnecessary assignment in fromString (Tobias Nießen) [#50199](https://github.com/nodejs/node/pull/50199) +* \[[`a362c276ec`](https://github.com/nodejs/node/commit/a362c276ec)] - **crypto**: ensure valid point on elliptic curve in SubtleCrypto.importKey (Filip Skokan) [#50234](https://github.com/nodejs/node/pull/50234) +* \[[`f4da308f8d`](https://github.com/nodejs/node/commit/f4da308f8d)] - **deps**: V8: cherry-pick f7d000a7ae7b (Luke Albao) [#50302](https://github.com/nodejs/node/pull/50302) +* \[[`269e268c38`](https://github.com/nodejs/node/commit/269e268c38)] - **deps**: update ada to 2.7.2 (Node.js GitHub Bot) [#50338](https://github.com/nodejs/node/pull/50338) +* \[[`03a31ce41e`](https://github.com/nodejs/node/commit/03a31ce41e)] - **deps**: update corepack to 0.22.0 (Node.js GitHub Bot) [#50325](https://github.com/nodejs/node/pull/50325) +* \[[`000531781b`](https://github.com/nodejs/node/commit/000531781b)] - **deps**: update undici to 5.26.4 (Node.js GitHub Bot) [#50274](https://github.com/nodejs/node/pull/50274) +* \[[`f050668c14`](https://github.com/nodejs/node/commit/f050668c14)] - **deps**: update c-ares to 1.20.1 (Node.js GitHub Bot) [#50082](https://github.com/nodejs/node/pull/50082) +* \[[`ba258b682b`](https://github.com/nodejs/node/commit/ba258b682b)] - **deps**: update c-ares to 1.20.0 (Node.js GitHub Bot) [#50082](https://github.com/nodejs/node/pull/50082) +* \[[`571f7ef1fa`](https://github.com/nodejs/node/commit/571f7ef1fa)] - **deps**: patch V8 to 11.8.172.15 (Michaël Zasso) [#50114](https://github.com/nodejs/node/pull/50114) +* \[[`943047e800`](https://github.com/nodejs/node/commit/943047e800)] - **deps**: V8: cherry-pick 25902244ad1a (Joyee Cheung) [#50156](https://github.com/nodejs/node/pull/50156) +* \[[`db2a1cf1cb`](https://github.com/nodejs/node/commit/db2a1cf1cb)] - **doc**: fix `navigator.hardwareConcurrency` example (Tobias Nießen) [#50278](https://github.com/nodejs/node/pull/50278) +* \[[`6e537aeb44`](https://github.com/nodejs/node/commit/6e537aeb44)] - **doc**: explain how to disable navigator (Geoffrey Booth) [#50310](https://github.com/nodejs/node/pull/50310) +* \[[`c40de82d62`](https://github.com/nodejs/node/commit/c40de82d62)] - **doc**: add loong64 info into platform list (Shi Pujin) [#50086](https://github.com/nodejs/node/pull/50086) +* \[[`1c21a1880b`](https://github.com/nodejs/node/commit/1c21a1880b)] - **doc**: update release process LTS step (Richard Lau) [#50299](https://github.com/nodejs/node/pull/50299) +* \[[`2473aa3672`](https://github.com/nodejs/node/commit/2473aa3672)] - **doc**: fix release process table of contents (Richard Lau) [#50216](https://github.com/nodejs/node/pull/50216) +* \[[`ce9d84eae3`](https://github.com/nodejs/node/commit/ce9d84eae3)] - **doc**: update api `stream.compose` (Alex Yang) [#50206](https://github.com/nodejs/node/pull/50206) +* \[[`dacee4d9b5`](https://github.com/nodejs/node/commit/dacee4d9b5)] - **doc**: add ReflectConstruct to known perf issues (Vinicius Lourenço) [#50111](https://github.com/nodejs/node/pull/50111) +* \[[`82363be2ac`](https://github.com/nodejs/node/commit/82363be2ac)] - **doc**: fix typo in dgram docs (Peter Johnson) [#50211](https://github.com/nodejs/node/pull/50211) +* \[[`8c1a46c751`](https://github.com/nodejs/node/commit/8c1a46c751)] - **doc**: fix H4ad collaborator sort (Vinicius Lourenço) [#50218](https://github.com/nodejs/node/pull/50218) +* \[[`3729e33358`](https://github.com/nodejs/node/commit/3729e33358)] - **doc**: add H4ad to collaborators (Vinícius Lourenço) [#50217](https://github.com/nodejs/node/pull/50217) +* \[[`bac872cbd0`](https://github.com/nodejs/node/commit/bac872cbd0)] - **doc**: update release-stewards with last sec-release (Rafael Gonzaga) [#50179](https://github.com/nodejs/node/pull/50179) +* \[[`06b7724f14`](https://github.com/nodejs/node/commit/06b7724f14)] - **doc**: add command to keep major branch sync (Rafael Gonzaga) [#50102](https://github.com/nodejs/node/pull/50102) +* \[[`47633ab086`](https://github.com/nodejs/node/commit/47633ab086)] - **doc**: add loong64 to list of architectures (Shi Pujin) [#50172](https://github.com/nodejs/node/pull/50172) +* \[[`1f40ca1b91`](https://github.com/nodejs/node/commit/1f40ca1b91)] - **doc**: update security release process (Michael Dawson) [#50166](https://github.com/nodejs/node/pull/50166) +* \[[`998feda118`](https://github.com/nodejs/node/commit/998feda118)] - **esm**: do not give wrong hints when detecting file format (Antoine du Hamel) [#50314](https://github.com/nodejs/node/pull/50314) +* \[[`e375063e01`](https://github.com/nodejs/node/commit/e375063e01)] - **(SEMVER-MINOR)** **esm**: detect ESM syntax in ambiguous JavaScript (Geoffrey Booth) [#50096](https://github.com/nodejs/node/pull/50096) +* \[[`c76eb27971`](https://github.com/nodejs/node/commit/c76eb27971)] - **esm**: improve check for ESM syntax (Geoffrey Booth) [#50127](https://github.com/nodejs/node/pull/50127) +* \[[`7740bf820c`](https://github.com/nodejs/node/commit/7740bf820c)] - **esm**: rename error code related to import attributes (Antoine du Hamel) [#50181](https://github.com/nodejs/node/pull/50181) +* \[[`0cc176ef25`](https://github.com/nodejs/node/commit/0cc176ef25)] - **fs**: improve error performance for `readSync` (Jungku Lee) [#50033](https://github.com/nodejs/node/pull/50033) +* \[[`5942edb774`](https://github.com/nodejs/node/commit/5942edb774)] - **fs**: improve error performance for `fsyncSync` (Jungku Lee) [#49880](https://github.com/nodejs/node/pull/49880) +* \[[`6ec5abadc0`](https://github.com/nodejs/node/commit/6ec5abadc0)] - **fs**: improve error performance for `mkdirSync` (CanadaHonk) [#49847](https://github.com/nodejs/node/pull/49847) +* \[[`c5ff000cb1`](https://github.com/nodejs/node/commit/c5ff000cb1)] - **fs**: improve error performance of `realpathSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`6eeaa02f5c`](https://github.com/nodejs/node/commit/6eeaa02f5c)] - **fs**: improve error performance of `lchownSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`dc9ac8d41c`](https://github.com/nodejs/node/commit/dc9ac8d41c)] - **fs**: improve error performance of `symlinkSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`bc6f279261`](https://github.com/nodejs/node/commit/bc6f279261)] - **fs**: improve error performance of `readlinkSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`275987841e`](https://github.com/nodejs/node/commit/275987841e)] - **fs**: improve error performance of `mkdtempSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`81f15274e2`](https://github.com/nodejs/node/commit/81f15274e2)] - **fs**: improve error performance of `linkSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`f766c04856`](https://github.com/nodejs/node/commit/f766c04856)] - **fs**: improve error performance of `chownSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`610036c67d`](https://github.com/nodejs/node/commit/610036c67d)] - **fs**: improve error performance of `renameSync` (Yagiz Nizipli) [#49962](https://github.com/nodejs/node/pull/49962) +* \[[`18862e4d5d`](https://github.com/nodejs/node/commit/18862e4d5d)] - **(SEMVER-MINOR)** **fs**: add flush option to appendFile() functions (Colin Ihrig) [#50095](https://github.com/nodejs/node/pull/50095) +* \[[`3f8cbb15cb`](https://github.com/nodejs/node/commit/3f8cbb15cb)] - **http2**: allow streams to complete gracefully after goaway (Michael Lumish) [#50202](https://github.com/nodejs/node/pull/50202) +* \[[`1464eba1a0`](https://github.com/nodejs/node/commit/1464eba1a0)] - **lib**: improve performance of validateStringArray and validateBooleanArray (Aras Abbasi) [#49756](https://github.com/nodejs/node/pull/49756) +* \[[`5a52c518ef`](https://github.com/nodejs/node/commit/5a52c518ef)] - **(SEMVER-MINOR)** **lib**: add `navigator.userAgent` (Yagiz Nizipli) [#50200](https://github.com/nodejs/node/pull/50200) +* \[[`b6021ab8f6`](https://github.com/nodejs/node/commit/b6021ab8f6)] - **lib**: reduce overhead of blob clone (Vinicius Lourenço) [#50110](https://github.com/nodejs/node/pull/50110) +* \[[`be19d9baa1`](https://github.com/nodejs/node/commit/be19d9baa1)] - **meta**: move Trott to TSC regular member (Rich Trott) [#50297](https://github.com/nodejs/node/pull/50297) +* \[[`91e373f8e9`](https://github.com/nodejs/node/commit/91e373f8e9)] - **node-api**: return napi\_exception\_pending on proxy handlers (Chengzhong Wu) [#48607](https://github.com/nodejs/node/pull/48607) +* \[[`531a3ae4b5`](https://github.com/nodejs/node/commit/531a3ae4b5)] - **stream**: simplify prefinish (Robert Nagy) [#50204](https://github.com/nodejs/node/pull/50204) +* \[[`514ac86579`](https://github.com/nodejs/node/commit/514ac86579)] - **stream**: reduce scope of readable bitmap details (Robert Nagy) [#49963](https://github.com/nodejs/node/pull/49963) +* \[[`789372a072`](https://github.com/nodejs/node/commit/789372a072)] - **(SEMVER-MINOR)** **stream**: allow pass stream class to `stream.compose` (Alex Yang) [#50187](https://github.com/nodejs/node/pull/50187) +* \[[`f3a9ea0bc4`](https://github.com/nodejs/node/commit/f3a9ea0bc4)] - **stream**: call helper function from push and unshift (Raz Luvaton) [#50173](https://github.com/nodejs/node/pull/50173) +* \[[`a9ca7b32e7`](https://github.com/nodejs/node/commit/a9ca7b32e7)] - **test**: improve watch mode test (Moshe Atlow) [#50319](https://github.com/nodejs/node/pull/50319) +* \[[`63b7059efd`](https://github.com/nodejs/node/commit/63b7059efd)] - **test**: set `test-watch-mode-inspect` as flaky (Yagiz Nizipli) [#50259](https://github.com/nodejs/node/pull/50259) +* \[[`7f87084b05`](https://github.com/nodejs/node/commit/7f87084b05)] - _**Revert**_ "**test**: set `test-esm-loader-resolve-type` as flaky" (Antoine du Hamel) [#50315](https://github.com/nodejs/node/pull/50315) +* \[[`4d390e2de4`](https://github.com/nodejs/node/commit/4d390e2de4)] - **test**: replace forEach with for..of in test-http-perf\_hooks.js (Niya Shiyas) [#49818](https://github.com/nodejs/node/pull/49818) +* \[[`67c599ec39`](https://github.com/nodejs/node/commit/67c599ec39)] - **test**: replace forEach with for..of in test-net-isipv4.js (Niya Shiyas) [#49822](https://github.com/nodejs/node/pull/49822) +* \[[`19d3ce2494`](https://github.com/nodejs/node/commit/19d3ce2494)] - **test**: deflake `test-esm-loader-resolve-type` (Antoine du Hamel) [#50273](https://github.com/nodejs/node/pull/50273) +* \[[`2d8d6c5701`](https://github.com/nodejs/node/commit/2d8d6c5701)] - **test**: replace forEach with for..of in test-http2-server (Niya Shiyas) [#49819](https://github.com/nodejs/node/pull/49819) +* \[[`af31d51e5a`](https://github.com/nodejs/node/commit/af31d51e5a)] - **test**: replace forEach with for..of in test-http2-client-destroy.js (Niya Shiyas) [#49820](https://github.com/nodejs/node/pull/49820) +* \[[`465ad2a5ce`](https://github.com/nodejs/node/commit/465ad2a5ce)] - **test**: update `url` web platform tests (Yagiz Nizipli) [#50264](https://github.com/nodejs/node/pull/50264) +* \[[`3b80a6894c`](https://github.com/nodejs/node/commit/3b80a6894c)] - **test**: set `test-emit-after-on-destroyed` as flaky (Yagiz Nizipli) [#50246](https://github.com/nodejs/node/pull/50246) +* \[[`57adbdd156`](https://github.com/nodejs/node/commit/57adbdd156)] - **test**: set inspector async stack test as flaky (Yagiz Nizipli) [#50244](https://github.com/nodejs/node/pull/50244) +* \[[`6507f66404`](https://github.com/nodejs/node/commit/6507f66404)] - **test**: set test-worker-nearheaplimit-deadlock flaky (StefanStojanovic) [#50277](https://github.com/nodejs/node/pull/50277) +* \[[`21a6ba548d`](https://github.com/nodejs/node/commit/21a6ba548d)] - **test**: set `test-cli-node-options` as flaky (Yagiz Nizipli) [#50296](https://github.com/nodejs/node/pull/50296) +* \[[`c55f8f30cb`](https://github.com/nodejs/node/commit/c55f8f30cb)] - **test**: reduce the number of requests and parsers (Luigi Pinca) [#50240](https://github.com/nodejs/node/pull/50240) +* \[[`5129bedfa2`](https://github.com/nodejs/node/commit/5129bedfa2)] - **test**: set crypto-timing test as flaky (Yagiz Nizipli) [#50232](https://github.com/nodejs/node/pull/50232) +* \[[`9bc5ab5e07`](https://github.com/nodejs/node/commit/9bc5ab5e07)] - **test**: set `test-structuredclone-*` as flaky (Yagiz Nizipli) [#50261](https://github.com/nodejs/node/pull/50261) +* \[[`317e447ddc`](https://github.com/nodejs/node/commit/317e447ddc)] - **test**: deflake `test-loaders-workers-spawned` (Antoine du Hamel) [#50251](https://github.com/nodejs/node/pull/50251) +* \[[`0c710daae2`](https://github.com/nodejs/node/commit/0c710daae2)] - **test**: improve code coverage of diagnostics\_channel (Jithil P Ponnan) [#50053](https://github.com/nodejs/node/pull/50053) +* \[[`7c6e4d7ec3`](https://github.com/nodejs/node/commit/7c6e4d7ec3)] - **test**: set `test-esm-loader-resolve-type` as flaky (Yagiz Nizipli) [#50226](https://github.com/nodejs/node/pull/50226) +* \[[`c8744909b0`](https://github.com/nodejs/node/commit/c8744909b0)] - **test**: set inspector async hook test as flaky (Yagiz Nizipli) [#50252](https://github.com/nodejs/node/pull/50252) +* \[[`3e38001739`](https://github.com/nodejs/node/commit/3e38001739)] - **test**: skip test-benchmark-os.js on IBM i (Abdirahim Musse) [#50208](https://github.com/nodejs/node/pull/50208) +* \[[`dd66fdfb7b`](https://github.com/nodejs/node/commit/dd66fdfb7b)] - **test**: set parallel http server test as flaky (Yagiz Nizipli) [#50227](https://github.com/nodejs/node/pull/50227) +* \[[`a38d1311bf`](https://github.com/nodejs/node/commit/a38d1311bf)] - **test**: set test-worker-nearheaplimit-deadlock flaky (Stefan Stojanovic) [#50238](https://github.com/nodejs/node/pull/50238) +* \[[`8efb75fd80`](https://github.com/nodejs/node/commit/8efb75fd80)] - **test**: set `test-runner-watch-mode` as flaky (Yagiz Nizipli) [#50221](https://github.com/nodejs/node/pull/50221) +* \[[`143ddded74`](https://github.com/nodejs/node/commit/143ddded74)] - **test**: set sea snapshot tests as flaky (Yagiz Nizipli) [#50223](https://github.com/nodejs/node/pull/50223) +* \[[`ae905a8f35`](https://github.com/nodejs/node/commit/ae905a8f35)] - **test**: fix defect path traversal tests (Tobias Nießen) [#50124](https://github.com/nodejs/node/pull/50124) +* \[[`ce27ee701b`](https://github.com/nodejs/node/commit/ce27ee701b)] - **tls**: reduce TLS 'close' event listener warnings (Tim Perry) [#50136](https://github.com/nodejs/node/pull/50136) +* \[[`ab4bae8e1f`](https://github.com/nodejs/node/commit/ab4bae8e1f)] - **tools**: drop support for osx notarization with gon (Ulises Gascón) [#50291](https://github.com/nodejs/node/pull/50291) +* \[[`5df3d5abcc`](https://github.com/nodejs/node/commit/5df3d5abcc)] - **tools**: update comment in `update-uncidi.sh` and `acorn_version.h` (Jungku Lee) [#50175](https://github.com/nodejs/node/pull/50175) +* \[[`bf7b94f0b3`](https://github.com/nodejs/node/commit/bf7b94f0b3)] - **tools**: refactor checkimports.py (Mohammed Keyvanzadeh) [#50011](https://github.com/nodejs/node/pull/50011) +* \[[`5dc454a837`](https://github.com/nodejs/node/commit/5dc454a837)] - **util**: remove internal mime fns from benchmarks (Aras Abbasi) [#50201](https://github.com/nodejs/node/pull/50201) +* \[[`8f7eb15603`](https://github.com/nodejs/node/commit/8f7eb15603)] - **vm**: use import attributes instead of import assertions (Antoine du Hamel) [#50141](https://github.com/nodejs/node/pull/50141) +* \[[`dda33c2bf1`](https://github.com/nodejs/node/commit/dda33c2bf1)] - **vm**: reject in importModuleDynamically without --experimental-vm-modules (Joyee Cheung) [#50137](https://github.com/nodejs/node/pull/50137) +* \[[`3999362c59`](https://github.com/nodejs/node/commit/3999362c59)] - **vm**: use internal versions of compileFunction and Script (Joyee Cheung) [#50137](https://github.com/nodejs/node/pull/50137) +* \[[`a54179f0e0`](https://github.com/nodejs/node/commit/a54179f0e0)] - **vm**: unify host-defined option generation in vm.compileFunction (Joyee Cheung) [#50137](https://github.com/nodejs/node/pull/50137) +* \[[`87be790fa9`](https://github.com/nodejs/node/commit/87be790fa9)] - **worker**: handle detached `MessagePort` from a different context (Juan José) [#49150](https://github.com/nodejs/node/pull/49150) + + + +## 2023-10-17, Version 21.0.0 (Current), @RafaelGSS and @targos + +We're excited to announce the release of Node.js 21! Highlights include updates of the V8 JavaScript engine to 11.8, +stable `fetch` and `WebStreams`, a new experimental flag to change the interpretation of ambiguous code +from CommonJS to ES modules (`--experimental-default-type`), many updates to our test runner, and more! + +Node.js 21 will replace Node.js 20 as our ‘Current’ release line when Node.js 20 enters long-term support (LTS) later this month. +As per the release schedule, Node.js 21 will be ‘Current' release for the next 6 months, until April 2024. + +### Other Notable Changes + +* \[[`740ca5423a`](https://github.com/nodejs/node/commit/740ca5423a)] - **doc**: promote fetch/webstreams from experimental to stable (Steven) [#45684](https://github.com/nodejs/node/pull/45684) +* \[[`85301803e1`](https://github.com/nodejs/node/commit/85301803e1)] - **esm**: --experimental-default-type flag to flip module defaults (Geoffrey Booth) [#49869](https://github.com/nodejs/node/pull/49869) +* \[[`705e623ac4`](https://github.com/nodejs/node/commit/705e623ac4)] - **esm**: remove `globalPreload` hook (superseded by `initialize`) (Jacob Smith) [#49144](https://github.com/nodejs/node/pull/49144) +* \[[`e01c1d700d`](https://github.com/nodejs/node/commit/e01c1d700d)] - **fs**: add flush option to writeFile() functions (Colin Ihrig) [#50009](https://github.com/nodejs/node/pull/50009) +* \[[`1948dce707`](https://github.com/nodejs/node/commit/1948dce707)] - **(SEMVER-MAJOR)** **fs**: add globSync implementation (Moshe Atlow) [#47653](https://github.com/nodejs/node/pull/47653) +* \[[`e28dbe1c2b`](https://github.com/nodejs/node/commit/e28dbe1c2b)] - **(SEMVER-MINOR)** **lib**: add WebSocket client (Matthew Aitken) [#49830](https://github.com/nodejs/node/pull/49830) +* \[[`95b8f5dcab`](https://github.com/nodejs/node/commit/95b8f5dcab)] - **stream**: optimize Writable (Robert Nagy) [#50012](https://github.com/nodejs/node/pull/50012) +* \[[`7cd4e70948`](https://github.com/nodejs/node/commit/7cd4e70948)] - **(SEMVER-MAJOR)** **test\_runner**: support passing globs (Moshe Atlow) [#47653](https://github.com/nodejs/node/pull/47653) +* \[[`1d220b55ac`](https://github.com/nodejs/node/commit/1d220b55ac)] - **vm**: use default HDO when importModuleDynamically is not set (Joyee Cheung) [#49950](https://github.com/nodejs/node/pull/49950) + +### Semver-Major Commits + +* \[[`ac2a68c76b`](https://github.com/nodejs/node/commit/ac2a68c76b)] - **(SEMVER-MAJOR)** **build**: drop support for Visual Studio 2019 (Michaël Zasso) [#49051](https://github.com/nodejs/node/pull/49051) +* \[[`4e3983031a`](https://github.com/nodejs/node/commit/4e3983031a)] - **(SEMVER-MAJOR)** **build**: bump supported macOS and Xcode versions (Michaël Zasso) [#49164](https://github.com/nodejs/node/pull/49164) +* \[[`5a0777776d`](https://github.com/nodejs/node/commit/5a0777776d)] - **(SEMVER-MAJOR)** **crypto**: do not overwrite \_writableState.defaultEncoding (Tobias Nießen) [#49140](https://github.com/nodejs/node/pull/49140) +* \[[`162a0652ab`](https://github.com/nodejs/node/commit/162a0652ab)] - **(SEMVER-MAJOR)** **deps**: bump minimum ICU version to 73 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`17a74ddd3d`](https://github.com/nodejs/node/commit/17a74ddd3d)] - **(SEMVER-MAJOR)** **deps**: update V8 to 11.8.172.13 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`e9ff81016d`](https://github.com/nodejs/node/commit/e9ff81016d)] - **(SEMVER-MAJOR)** **deps**: update llhttp to 9.1.2 (Paolo Insogna) [#48981](https://github.com/nodejs/node/pull/48981) +* \[[`7ace5aba75`](https://github.com/nodejs/node/commit/7ace5aba75)] - **(SEMVER-MAJOR)** **events**: validate options of `on` and `once` (Deokjin Kim) [#46018](https://github.com/nodejs/node/pull/46018) +* \[[`b3ec13d449`](https://github.com/nodejs/node/commit/b3ec13d449)] - **(SEMVER-MAJOR)** **fs**: adjust `position` validation in reading methods (Livia Medeiros) [#42835](https://github.com/nodejs/node/pull/42835) +* \[[`1948dce707`](https://github.com/nodejs/node/commit/1948dce707)] - **(SEMVER-MAJOR)** **fs**: add globSync implementation (Moshe Atlow) [#47653](https://github.com/nodejs/node/pull/47653) +* \[[`d68d0eacaa`](https://github.com/nodejs/node/commit/d68d0eacaa)] - **(SEMVER-MAJOR)** **http**: reduce parts in chunked response when corking (Robert Nagy) [#50167](https://github.com/nodejs/node/pull/50167) +* \[[`c5b0b894ed`](https://github.com/nodejs/node/commit/c5b0b894ed)] - **(SEMVER-MAJOR)** **lib**: mark URL/URLSearchParams as uncloneable and untransferable (Chengzhong Wu) [#47497](https://github.com/nodejs/node/pull/47497) +* \[[`3205b1936a`](https://github.com/nodejs/node/commit/3205b1936a)] - **(SEMVER-MAJOR)** **lib**: remove aix directory case for package reader (Yagiz Nizipli) [#48605](https://github.com/nodejs/node/pull/48605) +* \[[`b40f0c3074`](https://github.com/nodejs/node/commit/b40f0c3074)] - **(SEMVER-MAJOR)** **lib**: add `navigator.hardwareConcurrency` (Yagiz Nizipli) [#47769](https://github.com/nodejs/node/pull/47769) +* \[[`4b08c4c047`](https://github.com/nodejs/node/commit/4b08c4c047)] - **(SEMVER-MAJOR)** **lib**: runtime deprecate punycode (Yagiz Nizipli) [#47202](https://github.com/nodejs/node/pull/47202) +* \[[`3ce51ae9c0`](https://github.com/nodejs/node/commit/3ce51ae9c0)] - **(SEMVER-MAJOR)** **module**: harmonize error code between ESM and CJS (Antoine du Hamel) [#48606](https://github.com/nodejs/node/pull/48606) +* \[[`7202859402`](https://github.com/nodejs/node/commit/7202859402)] - **(SEMVER-MAJOR)** **net**: do not treat `server.maxConnections=0` as `Infinity` (ignoramous) [#48276](https://github.com/nodejs/node/pull/48276) +* \[[`c15bafdaf4`](https://github.com/nodejs/node/commit/c15bafdaf4)] - **(SEMVER-MAJOR)** **net**: only defer \_final call when connecting (Jason Zhang) [#47385](https://github.com/nodejs/node/pull/47385) +* \[[`6ffacbf0f9`](https://github.com/nodejs/node/commit/6ffacbf0f9)] - **(SEMVER-MAJOR)** **node-api**: rename internal NAPI\_VERSION definition (Chengzhong Wu) [#48501](https://github.com/nodejs/node/pull/48501) +* \[[`11af089b14`](https://github.com/nodejs/node/commit/11af089b14)] - **(SEMVER-MAJOR)** **src**: update NODE\_MODULE\_VERSION to 120 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`d920b7c94b`](https://github.com/nodejs/node/commit/d920b7c94b)] - **(SEMVER-MAJOR)** **src**: throw DOMException on cloning non-serializable objects (Chengzhong Wu) [#47839](https://github.com/nodejs/node/pull/47839) +* \[[`64549731b6`](https://github.com/nodejs/node/commit/64549731b6)] - **(SEMVER-MAJOR)** **src**: throw DataCloneError on transfering untransferable objects (Chengzhong Wu) [#47604](https://github.com/nodejs/node/pull/47604) +* \[[`dac8de689b`](https://github.com/nodejs/node/commit/dac8de689b)] - **(SEMVER-MAJOR)** **stream**: use private properties for strategies (Yagiz Nizipli) [#47218](https://github.com/nodejs/node/pull/47218) +* \[[`1fa084ecdf`](https://github.com/nodejs/node/commit/1fa084ecdf)] - **(SEMVER-MAJOR)** **stream**: use private properties for encoding (Yagiz Nizipli) [#47218](https://github.com/nodejs/node/pull/47218) +* \[[`4e93247079`](https://github.com/nodejs/node/commit/4e93247079)] - **(SEMVER-MAJOR)** **stream**: use private properties for compression (Yagiz Nizipli) [#47218](https://github.com/nodejs/node/pull/47218) +* \[[`527589b755`](https://github.com/nodejs/node/commit/527589b755)] - **(SEMVER-MAJOR)** **test\_runner**: disallow array in `run` options (Raz Luvaton) [#49935](https://github.com/nodejs/node/pull/49935) +* \[[`7cd4e70948`](https://github.com/nodejs/node/commit/7cd4e70948)] - **(SEMVER-MAJOR)** **test\_runner**: support passing globs (Moshe Atlow) [#47653](https://github.com/nodejs/node/pull/47653) +* \[[`2ef170254b`](https://github.com/nodejs/node/commit/2ef170254b)] - **(SEMVER-MAJOR)** **tls**: use `validateNumber` for `options.minDHSize` (Deokjin Kim) [#49973](https://github.com/nodejs/node/pull/49973) +* \[[`092fb9f541`](https://github.com/nodejs/node/commit/092fb9f541)] - **(SEMVER-MAJOR)** **tls**: use validateFunction for `options.checkServerIdentity` (Deokjin Kim) [#49896](https://github.com/nodejs/node/pull/49896) +* \[[`ccca547e28`](https://github.com/nodejs/node/commit/ccca547e28)] - **(SEMVER-MAJOR)** **util**: runtime deprecate `promisify`-ing a function returning a `Promise` (Antoine du Hamel) [#49609](https://github.com/nodejs/node/pull/49609) +* \[[`4038cf0513`](https://github.com/nodejs/node/commit/4038cf0513)] - **(SEMVER-MAJOR)** **vm**: freeze `dependencySpecifiers` array (Antoine du Hamel) [#49720](https://github.com/nodejs/node/pull/49720) + +### Semver-Minor Commits + +* \[[`3227d7327c`](https://github.com/nodejs/node/commit/3227d7327c)] - **(SEMVER-MINOR)** **deps**: update uvwasi to 0.0.19 (Node.js GitHub Bot) [#49908](https://github.com/nodejs/node/pull/49908) +* \[[`e28dbe1c2b`](https://github.com/nodejs/node/commit/e28dbe1c2b)] - **(SEMVER-MINOR)** **lib**: add WebSocket client (Matthew Aitken) [#49830](https://github.com/nodejs/node/pull/49830) +* \[[`9f9c58212e`](https://github.com/nodejs/node/commit/9f9c58212e)] - **(SEMVER-MINOR)** **test\_runner, cli**: add --test-concurrency flag (Colin Ihrig) [#49996](https://github.com/nodejs/node/pull/49996) +* \[[`d37b0d267f`](https://github.com/nodejs/node/commit/d37b0d267f)] - **(SEMVER-MINOR)** **wasi**: updates required for latest uvwasi version (Michael Dawson) [#49908](https://github.com/nodejs/node/pull/49908) + +### Semver-Patch Commits + +* \[[`33c87ec096`](https://github.com/nodejs/node/commit/33c87ec096)] - **benchmark**: fix race condition on fs benchs (Vinicius Lourenço) [#50035](https://github.com/nodejs/node/pull/50035) +* \[[`3c0ec61c4b`](https://github.com/nodejs/node/commit/3c0ec61c4b)] - **benchmark**: add warmup to accessSync bench (Rafael Gonzaga) [#50073](https://github.com/nodejs/node/pull/50073) +* \[[`1a839f388e`](https://github.com/nodejs/node/commit/1a839f388e)] - **benchmark**: improved config for blob,file benchmark (Vinícius Lourenço) [#49730](https://github.com/nodejs/node/pull/49730) +* \[[`86fe5a80f3`](https://github.com/nodejs/node/commit/86fe5a80f3)] - **benchmark**: added new benchmarks for blob (Vinícius Lourenço) [#49730](https://github.com/nodejs/node/pull/49730) +* \[[`6322d4f587`](https://github.com/nodejs/node/commit/6322d4f587)] - **build**: fix IBM i build with Python 3.9 (Richard Lau) [#48056](https://github.com/nodejs/node/pull/48056) +* \[[`17c55d176b`](https://github.com/nodejs/node/commit/17c55d176b)] - **build**: reset embedder string to "-node.0" (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`f10928f926`](https://github.com/nodejs/node/commit/f10928f926)] - **crypto**: use X509\_ALGOR accessors instead of reaching into X509\_ALGOR (David Benjamin) [#50057](https://github.com/nodejs/node/pull/50057) +* \[[`136a96722a`](https://github.com/nodejs/node/commit/136a96722a)] - **crypto**: account for disabled SharedArrayBuffer (Shelley Vohr) [#50034](https://github.com/nodejs/node/pull/50034) +* \[[`17b9925393`](https://github.com/nodejs/node/commit/17b9925393)] - **crypto**: return clear errors when loading invalid PFX data (Tim Perry) [#49566](https://github.com/nodejs/node/pull/49566) +* \[[`ca25d564c6`](https://github.com/nodejs/node/commit/ca25d564c6)] - **deps**: upgrade npm to 10.2.0 (npm team) [#50027](https://github.com/nodejs/node/pull/50027) +* \[[`f23a9353ae`](https://github.com/nodejs/node/commit/f23a9353ae)] - **deps**: update corepack to 0.21.0 (Node.js GitHub Bot) [#50088](https://github.com/nodejs/node/pull/50088) +* \[[`ceedb3a509`](https://github.com/nodejs/node/commit/ceedb3a509)] - **deps**: update simdutf to 3.2.18 (Node.js GitHub Bot) [#50091](https://github.com/nodejs/node/pull/50091) +* \[[`0522ac086c`](https://github.com/nodejs/node/commit/0522ac086c)] - **deps**: update zlib to 1.2.13.1-motley-fef5869 (Node.js GitHub Bot) [#50085](https://github.com/nodejs/node/pull/50085) +* \[[`4f8c5829da`](https://github.com/nodejs/node/commit/4f8c5829da)] - **deps**: update googletest to 2dd1c13 (Node.js GitHub Bot) [#50081](https://github.com/nodejs/node/pull/50081) +* \[[`588784ea30`](https://github.com/nodejs/node/commit/588784ea30)] - **deps**: update undici to 5.25.4 (Node.js GitHub Bot) [#50025](https://github.com/nodejs/node/pull/50025) +* \[[`c9eef0c3c4`](https://github.com/nodejs/node/commit/c9eef0c3c4)] - **deps**: update googletest to e47544a (Node.js GitHub Bot) [#49982](https://github.com/nodejs/node/pull/49982) +* \[[`23cb478398`](https://github.com/nodejs/node/commit/23cb478398)] - **deps**: update ada to 2.6.10 (Node.js GitHub Bot) [#49984](https://github.com/nodejs/node/pull/49984) +* \[[`61411bb323`](https://github.com/nodejs/node/commit/61411bb323)] - **deps**: fix call to undeclared functions 'ntohl' and 'htons' (MatteoBax) [#49979](https://github.com/nodejs/node/pull/49979) +* \[[`49cf182e30`](https://github.com/nodejs/node/commit/49cf182e30)] - **deps**: update ada to 2.6.9 (Node.js GitHub Bot) [#49340](https://github.com/nodejs/node/pull/49340) +* \[[`ceb6df0f22`](https://github.com/nodejs/node/commit/ceb6df0f22)] - **deps**: update ada to 2.6.8 (Node.js GitHub Bot) [#49340](https://github.com/nodejs/node/pull/49340) +* \[[`b73e18b5dc`](https://github.com/nodejs/node/commit/b73e18b5dc)] - **deps**: update ada to 2.6.7 (Node.js GitHub Bot) [#49340](https://github.com/nodejs/node/pull/49340) +* \[[`baf2256617`](https://github.com/nodejs/node/commit/baf2256617)] - **deps**: update ada to 2.6.5 (Node.js GitHub Bot) [#49340](https://github.com/nodejs/node/pull/49340) +* \[[`a20a328a9b`](https://github.com/nodejs/node/commit/a20a328a9b)] - **deps**: update ada to 2.6.3 (Node.js GitHub Bot) [#49340](https://github.com/nodejs/node/pull/49340) +* \[[`3838b579e4`](https://github.com/nodejs/node/commit/3838b579e4)] - **deps**: V8: cherry-pick 8ec2651fbdd8 (Abdirahim Musse) [#49862](https://github.com/nodejs/node/pull/49862) +* \[[`668437ccad`](https://github.com/nodejs/node/commit/668437ccad)] - **deps**: V8: cherry-pick b60a03df4ceb (Joyee Cheung) [#49491](https://github.com/nodejs/node/pull/49491) +* \[[`f970087147`](https://github.com/nodejs/node/commit/f970087147)] - **deps**: V8: backport 93b1a74cbc9b (Joyee Cheung) [#49419](https://github.com/nodejs/node/pull/49419) +* \[[`4531c154e5`](https://github.com/nodejs/node/commit/4531c154e5)] - **deps**: V8: cherry-pick 8ec2651fbdd8 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`9ad0e2cacc`](https://github.com/nodejs/node/commit/9ad0e2cacc)] - **deps**: V8: cherry-pick 89b3702c92b0 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`dfc9c86868`](https://github.com/nodejs/node/commit/dfc9c86868)] - **deps**: V8: cherry-pick de9a5de2274f (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`186b36efba`](https://github.com/nodejs/node/commit/186b36efba)] - **deps**: V8: cherry-pick b5b5d6c31bb0 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`867586ce95`](https://github.com/nodejs/node/commit/867586ce95)] - **deps**: V8: cherry-pick 93b1a74cbc9b (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`4ad3479ba7`](https://github.com/nodejs/node/commit/4ad3479ba7)] - **deps**: V8: cherry-pick 1a3ecc2483b2 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`660f902f16`](https://github.com/nodejs/node/commit/660f902f16)] - **deps**: patch V8 to avoid duplicated zlib symbol (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`f7c1d410ad`](https://github.com/nodejs/node/commit/f7c1d410ad)] - **deps**: remove usage of a C++20 feature from V8 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`9c4030bfb9`](https://github.com/nodejs/node/commit/9c4030bfb9)] - **deps**: avoid compilation error with ASan (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`5f05cc15e6`](https://github.com/nodejs/node/commit/5f05cc15e6)] - **deps**: disable V8 concurrent sparkplug compilation (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`42cd952dbd`](https://github.com/nodejs/node/commit/42cd952dbd)] - **deps**: silence irrelevant V8 warning (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`88cf90f9c4`](https://github.com/nodejs/node/commit/88cf90f9c4)] - **deps**: always define V8\_EXPORT\_PRIVATE as no-op (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`8609915951`](https://github.com/nodejs/node/commit/8609915951)] - **doc**: improve ccache explanation (Chengzhong Wu) [#50133](https://github.com/nodejs/node/pull/50133) +* \[[`91d21324a9`](https://github.com/nodejs/node/commit/91d21324a9)] - **doc**: move danielleadams to TSC non-voting member (Danielle Adams) [#50142](https://github.com/nodejs/node/pull/50142) +* \[[`34fa7043a2`](https://github.com/nodejs/node/commit/34fa7043a2)] - **doc**: fix description of `fs.readdir` `recursive` option (RamdohokarAngha) [#48902](https://github.com/nodejs/node/pull/48902) +* \[[`81e4d2ec2f`](https://github.com/nodejs/node/commit/81e4d2ec2f)] - **doc**: mention files read before env setup (Rafael Gonzaga) [#50072](https://github.com/nodejs/node/pull/50072) +* \[[`0ce37ed8e9`](https://github.com/nodejs/node/commit/0ce37ed8e9)] - **doc**: move permission model to Active Development (Rafael Gonzaga) [#50068](https://github.com/nodejs/node/pull/50068) +* \[[`3c430212c3`](https://github.com/nodejs/node/commit/3c430212c3)] - **doc**: add command to get patch minors and majors (Rafael Gonzaga) [#50067](https://github.com/nodejs/node/pull/50067) +* \[[`e43bf4c31d`](https://github.com/nodejs/node/commit/e43bf4c31d)] - **doc**: use precise promise terminology in fs (Benjamin Gruenbaum) [#50029](https://github.com/nodejs/node/pull/50029) +* \[[`d3a5f1fb5f`](https://github.com/nodejs/node/commit/d3a5f1fb5f)] - **doc**: use precise terminology in test runner (Benjamin Gruenbaum) [#50028](https://github.com/nodejs/node/pull/50028) +* \[[`24dea2348d`](https://github.com/nodejs/node/commit/24dea2348d)] - **doc**: clarify explaination text on how to run the example (Anshul Sinha) [#39020](https://github.com/nodejs/node/pull/39020) +* \[[`f3ed57bd8b`](https://github.com/nodejs/node/commit/f3ed57bd8b)] - **doc**: reserve 119 for Electron 28 (David Sanders) [#50020](https://github.com/nodejs/node/pull/50020) +* \[[`85c09f178c`](https://github.com/nodejs/node/commit/85c09f178c)] - **doc**: update Collaborator pronouns (Tierney Cyren) [#50005](https://github.com/nodejs/node/pull/50005) +* \[[`099e2f7bce`](https://github.com/nodejs/node/commit/099e2f7bce)] - **doc**: update link to Abstract Modules Records spec (Rich Trott) [#49961](https://github.com/nodejs/node/pull/49961) +* \[[`47b2883673`](https://github.com/nodejs/node/commit/47b2883673)] - **doc**: updated building docs for windows (Claudio W) [#49767](https://github.com/nodejs/node/pull/49767) +* \[[`7b624c30b2`](https://github.com/nodejs/node/commit/7b624c30b2)] - **doc**: update CHANGELOG\_V20 about vm fixes (Joyee Cheung) [#49951](https://github.com/nodejs/node/pull/49951) +* \[[`1dc0667aa6`](https://github.com/nodejs/node/commit/1dc0667aa6)] - **doc**: document dangerous symlink behavior (Tobias Nießen) [#49154](https://github.com/nodejs/node/pull/49154) +* \[[`bc056c2426`](https://github.com/nodejs/node/commit/bc056c2426)] - **doc**: add main ARIA landmark to API docs (Rich Trott) [#49882](https://github.com/nodejs/node/pull/49882) +* \[[`f416a0f555`](https://github.com/nodejs/node/commit/f416a0f555)] - **doc**: add navigation ARIA landmark to doc ToC (Rich Trott) [#49882](https://github.com/nodejs/node/pull/49882) +* \[[`740ca5423a`](https://github.com/nodejs/node/commit/740ca5423a)] - **doc**: promote fetch/webstreams from experimental to stable (Steven) [#45684](https://github.com/nodejs/node/pull/45684) +* \[[`f802aa0645`](https://github.com/nodejs/node/commit/f802aa0645)] - **doc**: fix 'partial' typo (Colin Ihrig) [#48657](https://github.com/nodejs/node/pull/48657) +* \[[`6fda81d4f5`](https://github.com/nodejs/node/commit/6fda81d4f5)] - **doc**: mention `Navigator` is a partial implementation (Moshe Atlow) [#48656](https://github.com/nodejs/node/pull/48656) +* \[[`6aa2aeedcb`](https://github.com/nodejs/node/commit/6aa2aeedcb)] - **doc**: mark Node.js 19 as End-of-Life (Richard Lau) [#48283](https://github.com/nodejs/node/pull/48283) +* \[[`0ee9c83ffc`](https://github.com/nodejs/node/commit/0ee9c83ffc)] - **errors**: improve performance of determine-specific-type (Aras Abbasi) [#49696](https://github.com/nodejs/node/pull/49696) +* \[[`4f84a3d200`](https://github.com/nodejs/node/commit/4f84a3d200)] - **errors**: improve formatList in errors.js (Aras Abbasi) [#49642](https://github.com/nodejs/node/pull/49642) +* \[[`cc725a653a`](https://github.com/nodejs/node/commit/cc725a653a)] - **errors**: improve performance of instantiation (Aras Abbasi) [#49654](https://github.com/nodejs/node/pull/49654) +* \[[`d1ef6aa2db`](https://github.com/nodejs/node/commit/d1ef6aa2db)] - **esm**: use import attributes instead of import assertions (Antoine du Hamel) [#50140](https://github.com/nodejs/node/pull/50140) +* \[[`19b470f866`](https://github.com/nodejs/node/commit/19b470f866)] - **esm**: bypass CommonJS loader under --default-type (Geoffrey Booth) [#49986](https://github.com/nodejs/node/pull/49986) +* \[[`9c683204db`](https://github.com/nodejs/node/commit/9c683204db)] - **esm**: unflag extensionless javascript and wasm in module scope (Geoffrey Booth) [#49974](https://github.com/nodejs/node/pull/49974) +* \[[`05be31d5de`](https://github.com/nodejs/node/commit/05be31d5de)] - **esm**: improve `getFormatOfExtensionlessFile` speed (Yagiz Nizipli) [#49965](https://github.com/nodejs/node/pull/49965) +* \[[`aadfea4979`](https://github.com/nodejs/node/commit/aadfea4979)] - **esm**: improve JSDoc annotation of internal functions (Antoine du Hamel) [#49959](https://github.com/nodejs/node/pull/49959) +* \[[`7f0e36af52`](https://github.com/nodejs/node/commit/7f0e36af52)] - **esm**: fix cache collision on JSON files using file: URL (Antoine du Hamel) [#49887](https://github.com/nodejs/node/pull/49887) +* \[[`85301803e1`](https://github.com/nodejs/node/commit/85301803e1)] - **esm**: --experimental-default-type flag to flip module defaults (Geoffrey Booth) [#49869](https://github.com/nodejs/node/pull/49869) +* \[[`f42a103991`](https://github.com/nodejs/node/commit/f42a103991)] - **esm**: require braces for modules code (Geoffrey Booth) [#49657](https://github.com/nodejs/node/pull/49657) +* \[[`705e623ac4`](https://github.com/nodejs/node/commit/705e623ac4)] - **esm**: remove `globalPreload` hook (superseded by `initialize`) (Jacob Smith) [#49144](https://github.com/nodejs/node/pull/49144) +* \[[`18a818744f`](https://github.com/nodejs/node/commit/18a818744f)] - **fs**: improve error performance of `readdirSync` (Yagiz Nizipli) [#50131](https://github.com/nodejs/node/pull/50131) +* \[[`d3985296a9`](https://github.com/nodejs/node/commit/d3985296a9)] - **fs**: fix `unlinkSync` typings (Yagiz Nizipli) [#49859](https://github.com/nodejs/node/pull/49859) +* \[[`6bc7fa7906`](https://github.com/nodejs/node/commit/6bc7fa7906)] - **fs**: improve error perf of sync `chmod`+`fchmod` (CanadaHonk) [#49859](https://github.com/nodejs/node/pull/49859) +* \[[`6bd77db41f`](https://github.com/nodejs/node/commit/6bd77db41f)] - **fs**: improve error perf of sync `*times` (CanadaHonk) [#49864](https://github.com/nodejs/node/pull/49864) +* \[[`bf0f0789da`](https://github.com/nodejs/node/commit/bf0f0789da)] - **fs**: improve error performance of writevSync (IlyasShabi) [#50038](https://github.com/nodejs/node/pull/50038) +* \[[`8a49735bae`](https://github.com/nodejs/node/commit/8a49735bae)] - **fs**: add flush option to createWriteStream() (Colin Ihrig) [#50093](https://github.com/nodejs/node/pull/50093) +* \[[`ed49722a8a`](https://github.com/nodejs/node/commit/ed49722a8a)] - **fs**: improve error performance for `ftruncateSync` (André Alves) [#50032](https://github.com/nodejs/node/pull/50032) +* \[[`e01c1d700d`](https://github.com/nodejs/node/commit/e01c1d700d)] - **fs**: add flush option to writeFile() functions (Colin Ihrig) [#50009](https://github.com/nodejs/node/pull/50009) +* \[[`f7a160d5b4`](https://github.com/nodejs/node/commit/f7a160d5b4)] - **fs**: improve error performance for `fdatasyncSync` (Jungku Lee) [#49898](https://github.com/nodejs/node/pull/49898) +* \[[`813713f211`](https://github.com/nodejs/node/commit/813713f211)] - **fs**: throw errors from sync branches instead of separate implementations (Joyee Cheung) [#49913](https://github.com/nodejs/node/pull/49913) +* \[[`b866e38192`](https://github.com/nodejs/node/commit/b866e38192)] - **http**: refactor to make servername option normalization testable (Rongjian Zhang) [#38733](https://github.com/nodejs/node/pull/38733) +* \[[`2990390359`](https://github.com/nodejs/node/commit/2990390359)] - **inspector**: simplify dispatchProtocolMessage (Daniel Lemire) [#49780](https://github.com/nodejs/node/pull/49780) +* \[[`d4c5fe488e`](https://github.com/nodejs/node/commit/d4c5fe488e)] - **lib**: fix compileFunction throws range error for negative numbers (Jithil P Ponnan) [#49855](https://github.com/nodejs/node/pull/49855) +* \[[`589ac5004c`](https://github.com/nodejs/node/commit/589ac5004c)] - **lib**: faster internal createBlob (Vinícius Lourenço) [#49730](https://github.com/nodejs/node/pull/49730) +* \[[`952cf0d17a`](https://github.com/nodejs/node/commit/952cf0d17a)] - **lib**: reduce overhead of validateObject (Vinicius Lourenço) [#49928](https://github.com/nodejs/node/pull/49928) +* \[[`fa250fdec1`](https://github.com/nodejs/node/commit/fa250fdec1)] - **lib**: make fetch sync and return a Promise (Matthew Aitken) [#49936](https://github.com/nodejs/node/pull/49936) +* \[[`1b96975f27`](https://github.com/nodejs/node/commit/1b96975f27)] - **lib**: fix `primordials` typings (Sam Verschueren) [#49895](https://github.com/nodejs/node/pull/49895) +* \[[`6aa7101960`](https://github.com/nodejs/node/commit/6aa7101960)] - **lib**: update params in jsdoc for `HTTPRequestOptions` (Jungku Lee) [#49872](https://github.com/nodejs/node/pull/49872) +* \[[`a4fdb1abe0`](https://github.com/nodejs/node/commit/a4fdb1abe0)] - **lib,test**: do not hardcode Buffer.kMaxLength (Michaël Zasso) [#49876](https://github.com/nodejs/node/pull/49876) +* \[[`fd21429ef5`](https://github.com/nodejs/node/commit/fd21429ef5)] - **lib**: update usage of always on Atomics API (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`bac85be22d`](https://github.com/nodejs/node/commit/bac85be22d)] - **meta**: ping TSC for offboarding (Tobias Nießen) [#50147](https://github.com/nodejs/node/pull/50147) +* \[[`609b13e6c2`](https://github.com/nodejs/node/commit/609b13e6c2)] - **meta**: bump actions/upload-artifact from 3.1.2 to 3.1.3 (dependabot\[bot]) [#50000](https://github.com/nodejs/node/pull/50000) +* \[[`3825464ef4`](https://github.com/nodejs/node/commit/3825464ef4)] - **meta**: bump actions/cache from 3.3.1 to 3.3.2 (dependabot\[bot]) [#50003](https://github.com/nodejs/node/pull/50003) +* \[[`49f0f9ca11`](https://github.com/nodejs/node/commit/49f0f9ca11)] - **meta**: bump github/codeql-action from 2.21.5 to 2.21.9 (dependabot\[bot]) [#50002](https://github.com/nodejs/node/pull/50002) +* \[[`f156427244`](https://github.com/nodejs/node/commit/f156427244)] - **meta**: bump actions/checkout from 3.6.0 to 4.1.0 (dependabot\[bot]) [#50001](https://github.com/nodejs/node/pull/50001) +* \[[`0fe673c7e6`](https://github.com/nodejs/node/commit/0fe673c7e6)] - **meta**: update website team with new name (Rich Trott) [#49883](https://github.com/nodejs/node/pull/49883) +* \[[`51f4ff2450`](https://github.com/nodejs/node/commit/51f4ff2450)] - **module**: move helpers out of cjs loader (Geoffrey Booth) [#49912](https://github.com/nodejs/node/pull/49912) +* \[[`7517c9f95b`](https://github.com/nodejs/node/commit/7517c9f95b)] - **module, esm**: jsdoc for modules files (Geoffrey Booth) [#49523](https://github.com/nodejs/node/pull/49523) +* \[[`b55adfb4f1`](https://github.com/nodejs/node/commit/b55adfb4f1)] - **node-api**: update headers for better wasm support (Toyo Li) [#49037](https://github.com/nodejs/node/pull/49037) +* \[[`b38e312486`](https://github.com/nodejs/node/commit/b38e312486)] - **node-api**: run finalizers directly from GC (Vladimir Morozov) [#42651](https://github.com/nodejs/node/pull/42651) +* \[[`0f0dd1a493`](https://github.com/nodejs/node/commit/0f0dd1a493)] - **os**: cache homedir, remove getCheckedFunction (Aras Abbasi) [#50037](https://github.com/nodejs/node/pull/50037) +* \[[`0e507d30ac`](https://github.com/nodejs/node/commit/0e507d30ac)] - **perf\_hooks**: reduce overhead of new user timings (Vinicius Lourenço) [#49914](https://github.com/nodejs/node/pull/49914) +* \[[`328bdac7f0`](https://github.com/nodejs/node/commit/328bdac7f0)] - **perf\_hooks**: reducing overhead of performance observer entry list (Vinicius Lourenço) [#50008](https://github.com/nodejs/node/pull/50008) +* \[[`e6e320ecc7`](https://github.com/nodejs/node/commit/e6e320ecc7)] - **perf\_hooks**: reduce overhead of new resource timings (Vinicius Lourenço) [#49837](https://github.com/nodejs/node/pull/49837) +* \[[`971af4b211`](https://github.com/nodejs/node/commit/971af4b211)] - **quic**: fix up coverity warning in quic/session.cc (Michael Dawson) [#49865](https://github.com/nodejs/node/pull/49865) +* \[[`546797f2b1`](https://github.com/nodejs/node/commit/546797f2b1)] - **quic**: prevent copying ngtcp2\_cid (Tobias Nießen) [#48561](https://github.com/nodejs/node/pull/48561) +* \[[`ac6f594c97`](https://github.com/nodejs/node/commit/ac6f594c97)] - **quic**: address new coverity warning (Michael Dawson) [#48384](https://github.com/nodejs/node/pull/48384) +* \[[`4ee8ef269b`](https://github.com/nodejs/node/commit/4ee8ef269b)] - **quic**: prevent copying ngtcp2\_cid\_token (Tobias Nießen) [#48370](https://github.com/nodejs/node/pull/48370) +* \[[`6d2811fbf2`](https://github.com/nodejs/node/commit/6d2811fbf2)] - **quic**: add additional implementation (James M Snell) [#47927](https://github.com/nodejs/node/pull/47927) +* \[[`0b3fcfcf35`](https://github.com/nodejs/node/commit/0b3fcfcf35)] - **quic**: fix typo in endpoint.h (Tobias Nießen) [#47911](https://github.com/nodejs/node/pull/47911) +* \[[`76044c4e2b`](https://github.com/nodejs/node/commit/76044c4e2b)] - **quic**: add additional QUIC implementation (James M Snell) [#47603](https://github.com/nodejs/node/pull/47603) +* \[[`78a15702dd`](https://github.com/nodejs/node/commit/78a15702dd)] - **src**: avoid making JSTransferable wrapper object weak (Chengzhong Wu) [#50026](https://github.com/nodejs/node/pull/50026) +* \[[`387e2929fe`](https://github.com/nodejs/node/commit/387e2929fe)] - **src**: generate default snapshot with --predictable (Joyee Cheung) [#48749](https://github.com/nodejs/node/pull/48749) +* \[[`1643adf771`](https://github.com/nodejs/node/commit/1643adf771)] - **src**: fix TLSWrap lifetime bug in ALPN callback (Ben Noordhuis) [#49635](https://github.com/nodejs/node/pull/49635) +* \[[`66776d8665`](https://github.com/nodejs/node/commit/66776d8665)] - **src**: set port in node\_options to uint16\_t (Yagiz Nizipli) [#49151](https://github.com/nodejs/node/pull/49151) +* \[[`55ff64001a`](https://github.com/nodejs/node/commit/55ff64001a)] - **src**: name scoped lock (Mohammed Keyvanzadeh) [#50010](https://github.com/nodejs/node/pull/50010) +* \[[`b903a710f4`](https://github.com/nodejs/node/commit/b903a710f4)] - **src**: use exact return value for `uv_os_getenv` (Yagiz Nizipli) [#49149](https://github.com/nodejs/node/pull/49149) +* \[[`43500fa646`](https://github.com/nodejs/node/commit/43500fa646)] - **src**: move const variable in `node_file.h` to `node_file.cc` (Jungku Lee) [#49688](https://github.com/nodejs/node/pull/49688) +* \[[`36ab510da7`](https://github.com/nodejs/node/commit/36ab510da7)] - **src**: remove unused variable (Michaël Zasso) [#49665](https://github.com/nodejs/node/pull/49665) +* \[[`23d65e7281`](https://github.com/nodejs/node/commit/23d65e7281)] - **src**: revert `IS_RELEASE` to 0 (Rafael Gonzaga) [#49084](https://github.com/nodejs/node/pull/49084) +* \[[`38dee8a1c0`](https://github.com/nodejs/node/commit/38dee8a1c0)] - **src**: distinguish HTML transferable and cloneable (Chengzhong Wu) [#47956](https://github.com/nodejs/node/pull/47956) +* \[[`586fcff061`](https://github.com/nodejs/node/commit/586fcff061)] - **src**: fix logically dead code reported by Coverity (Mohammed Keyvanzadeh) [#48589](https://github.com/nodejs/node/pull/48589) +* \[[`7f2c810814`](https://github.com/nodejs/node/commit/7f2c810814)] - **src,tools**: initialize cppgc (Daryl Haresign) [#45704](https://github.com/nodejs/node/pull/45704) +* \[[`aad8002b88`](https://github.com/nodejs/node/commit/aad8002b88)] - **stream**: use private symbol for bitmap state (Robert Nagy) [#49993](https://github.com/nodejs/node/pull/49993) +* \[[`a85e4186e5`](https://github.com/nodejs/node/commit/a85e4186e5)] - **stream**: reduce overhead of transfer (Vinicius Lourenço) [#50107](https://github.com/nodejs/node/pull/50107) +* \[[`e9bda11761`](https://github.com/nodejs/node/commit/e9bda11761)] - **stream**: lazy allocate back pressure buffer (Robert Nagy) [#50013](https://github.com/nodejs/node/pull/50013) +* \[[`557044af40`](https://github.com/nodejs/node/commit/557044af40)] - **stream**: avoid unnecessary drain for sync stream (Robert Nagy) [#50014](https://github.com/nodejs/node/pull/50014) +* \[[`95b8f5dcab`](https://github.com/nodejs/node/commit/95b8f5dcab)] - **stream**: optimize Writable (Robert Nagy) [#50012](https://github.com/nodejs/node/pull/50012) +* \[[`5de25deeb9`](https://github.com/nodejs/node/commit/5de25deeb9)] - **stream**: avoid tick in writable hot path (Robert Nagy) [#49966](https://github.com/nodejs/node/pull/49966) +* \[[`53b5545672`](https://github.com/nodejs/node/commit/53b5545672)] - **stream**: writable state bitmap (Robert Nagy) [#49899](https://github.com/nodejs/node/pull/49899) +* \[[`d4e99b1a66`](https://github.com/nodejs/node/commit/d4e99b1a66)] - **stream**: remove asIndexedPairs (Chemi Atlow) [#48150](https://github.com/nodejs/node/pull/48150) +* \[[`41e4174945`](https://github.com/nodejs/node/commit/41e4174945)] - **test**: replace forEach with for..of in test-net-isipv6.js (Niya Shiyas) [#49823](https://github.com/nodejs/node/pull/49823) +* \[[`f0e720a7fa`](https://github.com/nodejs/node/commit/f0e720a7fa)] - **test**: add EOVERFLOW as an allowed error (Abdirahim Musse) [#50128](https://github.com/nodejs/node/pull/50128) +* \[[`224f3ae974`](https://github.com/nodejs/node/commit/224f3ae974)] - **test**: reduce number of repetition in test-heapdump-shadowrealm.js (Chengzhong Wu) [#50104](https://github.com/nodejs/node/pull/50104) +* \[[`76004f3e56`](https://github.com/nodejs/node/commit/76004f3e56)] - **test**: replace forEach with for..of in test-parse-args.mjs (Niya Shiyas) [#49824](https://github.com/nodejs/node/pull/49824) +* \[[`fce8fbadcd`](https://github.com/nodejs/node/commit/fce8fbadcd)] - **test**: replace forEach with for..of in test-process-env (Niya Shiyas) [#49825](https://github.com/nodejs/node/pull/49825) +* \[[`24492476a7`](https://github.com/nodejs/node/commit/24492476a7)] - **test**: replace forEach with for..of in test-http-url (Niya Shiyas) [#49840](https://github.com/nodejs/node/pull/49840) +* \[[`2fe511ba23`](https://github.com/nodejs/node/commit/2fe511ba23)] - **test**: replace forEach() in test-net-perf\_hooks with for of (Narcisa Codreanu) [#49831](https://github.com/nodejs/node/pull/49831) +* \[[`42c37f28e6`](https://github.com/nodejs/node/commit/42c37f28e6)] - **test**: change forEach to for...of (Tiffany Lastimosa) [#49799](https://github.com/nodejs/node/pull/49799) +* \[[`6c9625dca4`](https://github.com/nodejs/node/commit/6c9625dca4)] - **test**: update skip for moved `test-wasm-web-api` (Richard Lau) [#49958](https://github.com/nodejs/node/pull/49958) +* \[[`f05d6d090c`](https://github.com/nodejs/node/commit/f05d6d090c)] - _**Revert**_ "**test**: mark test-runner-output as flaky" (Luigi Pinca) [#49905](https://github.com/nodejs/node/pull/49905) +* \[[`035e06317a`](https://github.com/nodejs/node/commit/035e06317a)] - **test**: disambiguate AIX and IBM i (Richard Lau) [#48056](https://github.com/nodejs/node/pull/48056) +* \[[`4d0aeed4a6`](https://github.com/nodejs/node/commit/4d0aeed4a6)] - **test**: deflake test-perf-hooks.js (Joyee Cheung) [#49892](https://github.com/nodejs/node/pull/49892) +* \[[`853f57239c`](https://github.com/nodejs/node/commit/853f57239c)] - **test**: migrate message error tests from Python to JS (Yiyun Lei) [#49721](https://github.com/nodejs/node/pull/49721) +* \[[`a71e3a65bb`](https://github.com/nodejs/node/commit/a71e3a65bb)] - **test**: fix edge snapshot stack traces (Geoffrey Booth) [#49659](https://github.com/nodejs/node/pull/49659) +* \[[`6b76b7782c`](https://github.com/nodejs/node/commit/6b76b7782c)] - **test**: skip v8-updates/test-linux-perf (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`c13c98dd38`](https://github.com/nodejs/node/commit/c13c98dd38)] - **test**: skip test-tick-processor-arguments on SmartOS (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`738aa304b3`](https://github.com/nodejs/node/commit/738aa304b3)] - **test**: adapt REPL test to V8 changes (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`de5c009252`](https://github.com/nodejs/node/commit/de5c009252)] - **test**: adapt test-fs-write to V8 internal changes (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`8c36168b42`](https://github.com/nodejs/node/commit/8c36168b42)] - **test**: update flag to disable SharedArrayBuffer (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`6ccb15f7ef`](https://github.com/nodejs/node/commit/6ccb15f7ef)] - **test**: adapt debugger tests to V8 11.4 (Philip Pfaffe) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`c5de3b49e8`](https://github.com/nodejs/node/commit/c5de3b49e8)] - **test,crypto**: update WebCryptoAPI WPT (Filip Skokan) [#50039](https://github.com/nodejs/node/pull/50039) +* \[[`4b35a9cfda`](https://github.com/nodejs/node/commit/4b35a9cfda)] - **test\_runner**: add test location for FileTests (Colin Ihrig) [#49999](https://github.com/nodejs/node/pull/49999) +* \[[`c935d4c8fa`](https://github.com/nodejs/node/commit/c935d4c8fa)] - **test\_runner**: replace spurious if with else (Colin Ihrig) [#49943](https://github.com/nodejs/node/pull/49943) +* \[[`a4c7f81241`](https://github.com/nodejs/node/commit/a4c7f81241)] - **test\_runner**: catch reporter errors (Moshe Atlow) [#49646](https://github.com/nodejs/node/pull/49646) +* \[[`bb52656fc6`](https://github.com/nodejs/node/commit/bb52656fc6)] - _**Revert**_ "**test\_runner**: run global after() hook earlier" (Joyee Cheung) [#49110](https://github.com/nodejs/node/pull/49110) +* \[[`6346bdc526`](https://github.com/nodejs/node/commit/6346bdc526)] - **test\_runner**: run global after() hook earlier (Colin Ihrig) [#49059](https://github.com/nodejs/node/pull/49059) +* \[[`0d8faf2952`](https://github.com/nodejs/node/commit/0d8faf2952)] - **test\_runner,test**: fix flaky test-runner-cli-concurrency.js (Colin Ihrig) [#50108](https://github.com/nodejs/node/pull/50108) +* \[[`b1ada0ad55`](https://github.com/nodejs/node/commit/b1ada0ad55)] - **tls**: handle cases where the raw socket is destroyed (Luigi Pinca) [#49980](https://github.com/nodejs/node/pull/49980) +* \[[`fae1af0a75`](https://github.com/nodejs/node/commit/fae1af0a75)] - **tls**: ciphers allow bang syntax (Chemi Atlow) [#49712](https://github.com/nodejs/node/pull/49712) +* \[[`766198b9e1`](https://github.com/nodejs/node/commit/766198b9e1)] - **tools**: fix comments referencing dep\_updaters scripts (Keksonoid) [#50165](https://github.com/nodejs/node/pull/50165) +* \[[`760b5dd259`](https://github.com/nodejs/node/commit/760b5dd259)] - **tools**: remove no-return-await lint rule (翠 / green) [#50118](https://github.com/nodejs/node/pull/50118) +* \[[`a0a5b751fb`](https://github.com/nodejs/node/commit/a0a5b751fb)] - **tools**: update lint-md-dependencies (Node.js GitHub Bot) [#50083](https://github.com/nodejs/node/pull/50083) +* \[[`69fb55e6b9`](https://github.com/nodejs/node/commit/69fb55e6b9)] - **tools**: update eslint to 8.51.0 (Node.js GitHub Bot) [#50084](https://github.com/nodejs/node/pull/50084) +* \[[`f73650ea52`](https://github.com/nodejs/node/commit/f73650ea52)] - **tools**: remove genv8constants.py (Ben Noordhuis) [#50023](https://github.com/nodejs/node/pull/50023) +* \[[`581434e54f`](https://github.com/nodejs/node/commit/581434e54f)] - **tools**: update eslint to 8.50.0 (Node.js GitHub Bot) [#49989](https://github.com/nodejs/node/pull/49989) +* \[[`344d3c4b7c`](https://github.com/nodejs/node/commit/344d3c4b7c)] - **tools**: update lint-md-dependencies (Node.js GitHub Bot) [#49983](https://github.com/nodejs/node/pull/49983) +* \[[`7f06c270c6`](https://github.com/nodejs/node/commit/7f06c270c6)] - **tools**: add navigation ARIA landmark to generated API ToC (Rich Trott) [#49882](https://github.com/nodejs/node/pull/49882) +* \[[`e97d25687b`](https://github.com/nodejs/node/commit/e97d25687b)] - **tools**: use osx notarytool for future releases (Ulises Gascon) [#48701](https://github.com/nodejs/node/pull/48701) +* \[[`3f1936f698`](https://github.com/nodejs/node/commit/3f1936f698)] - **tools**: update github\_reporter to 1.5.3 (Node.js GitHub Bot) [#49877](https://github.com/nodejs/node/pull/49877) +* \[[`8568de3da6`](https://github.com/nodejs/node/commit/8568de3da6)] - **tools**: add new V8 headers to distribution (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`86cb23d09f`](https://github.com/nodejs/node/commit/86cb23d09f)] - **tools**: update V8 gypfiles for 11.8 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`9c6219c7e2`](https://github.com/nodejs/node/commit/9c6219c7e2)] - **tools**: update V8 gypfiles for 11.7 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`73ddf50163`](https://github.com/nodejs/node/commit/73ddf50163)] - **tools**: update V8 gypfiles for 11.6 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`817ef255ea`](https://github.com/nodejs/node/commit/817ef255ea)] - **tools**: update V8 gypfiles for 11.5 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`f34a3a9861`](https://github.com/nodejs/node/commit/f34a3a9861)] - **tools**: update V8 gypfiles for 11.4 (Michaël Zasso) [#49639](https://github.com/nodejs/node/pull/49639) +* \[[`9df864ddeb`](https://github.com/nodejs/node/commit/9df864ddeb)] - **typings**: use `Symbol.dispose` and `Symbol.asyncDispose` in types (Niklas Mollenhauer) [#50123](https://github.com/nodejs/node/pull/50123) +* \[[`54bb691c0b`](https://github.com/nodejs/node/commit/54bb691c0b)] - **util**: lazy parse mime parameters (Aras Abbasi) [#49889](https://github.com/nodejs/node/pull/49889) +* \[[`1d220b55ac`](https://github.com/nodejs/node/commit/1d220b55ac)] - **vm**: use default HDO when importModuleDynamically is not set (Joyee Cheung) [#49950](https://github.com/nodejs/node/pull/49950) +* \[[`c1a3a98560`](https://github.com/nodejs/node/commit/c1a3a98560)] - **wasi**: address coverity warning (Michael Dawson) [#49866](https://github.com/nodejs/node/pull/49866) +* \[[`9cb8eb7177`](https://github.com/nodejs/node/commit/9cb8eb7177)] - **wasi**: fix up wasi tests for ibmi (Michael Dawson) [#49953](https://github.com/nodejs/node/pull/49953) +* \[[`16ac5e1ca8`](https://github.com/nodejs/node/commit/16ac5e1ca8)] - **zlib**: fix discovery of cpu-features.h for android (MatteoBax) [#49828](https://github.com/nodejs/node/pull/49828) From 1bd6537c97199326f7067e5df870808215fe532a Mon Sep 17 00:00:00 2001 From: Luigi Pinca Date: Wed, 1 Nov 2023 14:41:03 +0100 Subject: [PATCH 207/229] doc: recommend supported Python versions Refs: https://github.com/nodejs/node/pull/50209#issuecomment-1781016950 PR-URL: https://github.com/nodejs/node/pull/50407 Reviewed-By: Richard Lau Reviewed-By: James M Snell --- BUILDING.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/BUILDING.md b/BUILDING.md index e169d7cfe04557..bedc4da273fdf3 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -230,7 +230,7 @@ The Node.js project supports Python >= 3 for building and testing. * `gcc` and `g++` >= 10.1 or newer * GNU Make 3.81 or newer -* Python >=3.6 <=3.11 (see note above) +* [A supported version of Python][Python versions] * For test coverage, your Python installation must include pip. Installation via Linux package manager can be achieved with: @@ -246,7 +246,7 @@ FreeBSD and OpenBSD users may also need to install `libexecinfo`. #### macOS prerequisites * Xcode Command Line Tools >= 11 for macOS -* Python >=3.6 <=3.11 (see note above) +* [A supported version of Python][Python versions] * For test coverage, your Python installation must include pip. macOS users can install the `Xcode Command Line Tools` by running @@ -582,7 +582,8 @@ to run it again before invoking `make -j4`. ##### Option 1: Manual install -* [Python 3.11](https://apps.microsoft.com/store/detail/python-311/9NRWMJP3717K) +* The current [version of Python][Python versions] from the + [Microsoft Store](https://apps.microsoft.com/store/search?publisher=Python+Software+Foundation) * The "Desktop development with C++" workload from [Visual Studio 2022](https://visualstudio.microsoft.com/downloads/) or the "C++ build tools" workload from the @@ -893,3 +894,5 @@ incompatible with the official Node.js builds (e.g. using a ABI incompatible version of a dependency), please reserve and use a custom `NODE_MODULE_VERSION` by opening a pull request against the registry available at . + +[Python versions]: https://devguide.python.org/versions/ From 48dbde71d803dc4bdc20dc9d2af62bbfb640ca3e Mon Sep 17 00:00:00 2001 From: Aras Abbasi Date: Thu, 2 Nov 2023 14:31:50 +0100 Subject: [PATCH 208/229] lib: use primordials for navigator.userAgent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-URL: https://github.com/nodejs/node/pull/50467 Reviewed-By: Geoffrey Booth Reviewed-By: Vinícius Lourenço Claro Cardoso Reviewed-By: Yagiz Nizipli Reviewed-By: James M Snell Reviewed-By: Tobias Nießen Reviewed-By: Ethan Arrowood --- lib/internal/navigator.js | 61 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 lib/internal/navigator.js diff --git a/lib/internal/navigator.js b/lib/internal/navigator.js new file mode 100644 index 00000000000000..5b29a098fcd0d9 --- /dev/null +++ b/lib/internal/navigator.js @@ -0,0 +1,61 @@ +'use strict'; + +const { + ObjectDefineProperties, + StringPrototypeIndexOf, + StringPrototypeSlice, + Symbol, +} = primordials; + +const { + ERR_ILLEGAL_CONSTRUCTOR, +} = require('internal/errors').codes; + +const { + kEnumerableProperty, +} = require('internal/util'); + +const { + getAvailableParallelism, +} = internalBinding('os'); + +const kInitialize = Symbol('kInitialize'); +const nodeVersion = process.version; + +class Navigator { + // Private properties are used to avoid brand validations. + #availableParallelism; + #userAgent = `Node.js/${StringPrototypeSlice(nodeVersion, 1, StringPrototypeIndexOf(nodeVersion, '.'))}`; + + constructor() { + if (arguments[0] === kInitialize) { + return; + } + throw new ERR_ILLEGAL_CONSTRUCTOR(); + } + + /** + * @return {number} + */ + get hardwareConcurrency() { + this.#availableParallelism ??= getAvailableParallelism(); + return this.#availableParallelism; + } + + /** + * @return {string} + */ + get userAgent() { + return this.#userAgent; + } +} + +ObjectDefineProperties(Navigator.prototype, { + hardwareConcurrency: kEnumerableProperty, + userAgent: kEnumerableProperty, +}); + +module.exports = { + navigator: new Navigator(kInitialize), + Navigator, +}; From 656135d70a718bc4330f47f5581bb14fcfc9ee86 Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Wed, 1 Nov 2023 13:33:19 +0000 Subject: [PATCH 209/229] deps: update zlib to 1.2.13.1-motley-dfc48fc PR-URL: https://github.com/nodejs/node/pull/50456 Reviewed-By: Mohammed Keyvanzadeh Reviewed-By: Luigi Pinca Reviewed-By: James M Snell --- deps/zlib/google/zip_reader_unittest.cc | 2 +- deps/zlib/google/zip_unittest.cc | 4 ++-- doc/contributing/maintaining/maintaining-dependencies.md | 6 +++--- src/zlib_version.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/deps/zlib/google/zip_reader_unittest.cc b/deps/zlib/google/zip_reader_unittest.cc index 6086c97c49b622..8ef0274e112483 100644 --- a/deps/zlib/google/zip_reader_unittest.cc +++ b/deps/zlib/google/zip_reader_unittest.cc @@ -157,7 +157,7 @@ class ZipReaderTest : public PlatformTest { static base::FilePath GetTestDataDirectory() { base::FilePath path; - CHECK(base::PathService::Get(base::DIR_SOURCE_ROOT, &path)); + CHECK(base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, &path)); return path.AppendASCII("third_party") .AppendASCII("zlib") .AppendASCII("google") diff --git a/deps/zlib/google/zip_unittest.cc b/deps/zlib/google/zip_unittest.cc index d0fc02fd939a15..922d38303ce845 100644 --- a/deps/zlib/google/zip_unittest.cc +++ b/deps/zlib/google/zip_unittest.cc @@ -206,7 +206,7 @@ class VirtualFileSystem : public zip::FileAccessor { info->is_directory = !files_.count(path); info->last_modified = - base::Time::FromDoubleT(172097977); // Some random date. + base::Time::FromSecondsSinceUnixEpoch(172097977); // Some random date. return true; } @@ -256,7 +256,7 @@ class ZipTest : public PlatformTest { static base::FilePath GetDataDirectory() { base::FilePath path; - bool success = base::PathService::Get(base::DIR_SOURCE_ROOT, &path); + bool success = base::PathService::Get(base::DIR_SRC_TEST_DATA_ROOT, &path); EXPECT_TRUE(success); return std::move(path) .AppendASCII("third_party") diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md index 50cb7b2ceaeafa..176fe72be4c617 100644 --- a/doc/contributing/maintaining/maintaining-dependencies.md +++ b/doc/contributing/maintaining/maintaining-dependencies.md @@ -31,7 +31,7 @@ This a list of all the dependencies: * [undici 5.27.0][] * [uvwasi 0.0.19][] * [V8 11.3.244.8][] -* [zlib 1.2.13.1-motley-fef5869][] +* [zlib 1.2.13.1-motley-dfc48fc][] Any code which meets one or more of these conditions should be managed as a dependency: @@ -311,7 +311,7 @@ See [maintaining-web-assembly][] for more informations. high-performance JavaScript and WebAssembly engine, written in C++. See [maintaining-V8][] for more informations. -### zlib 1.2.13.1-motley-fef5869 +### zlib 1.2.13.1-motley-dfc48fc The [zlib](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/third_party/zlib) dependency lossless data-compression library, @@ -349,4 +349,4 @@ performance improvements not currently available in standard zlib. [update-openssl-action]: ../../../.github/workflows/update-openssl.yml [uvwasi 0.0.19]: #uvwasi-0019 [v8 11.3.244.8]: #v8-1132448 -[zlib 1.2.13.1-motley-fef5869]: #zlib-12131-motley-fef5869 +[zlib 1.2.13.1-motley-dfc48fc]: #zlib-12131-motley-dfc48fc diff --git a/src/zlib_version.h b/src/zlib_version.h index 77c4f92c5de0ef..02390a4afd0ec5 100644 --- a/src/zlib_version.h +++ b/src/zlib_version.h @@ -2,5 +2,5 @@ // Refer to tools/dep_updaters/update-zlib.sh #ifndef SRC_ZLIB_VERSION_H_ #define SRC_ZLIB_VERSION_H_ -#define ZLIB_VERSION "1.2.13.1-motley-fef5869" +#define ZLIB_VERSION "1.2.13.1-motley-dfc48fc" #endif // SRC_ZLIB_VERSION_H_ From bc2ebb972b34f54e042de9636e7451d2526436a9 Mon Sep 17 00:00:00 2001 From: Levi Zim Date: Mon, 6 Nov 2023 23:19:55 +0800 Subject: [PATCH 210/229] deps: V8: cherry-pick 13192d6e10fa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original commit message: [riscv][tagged-ptr] Convert more Objects to Tagged<> Port commit 064b9a7903b793734b6c03a86ee53a2dc85f0f80 Bug: v8:12710 Change-Id: If076ca5cd9e9d175c20fc3611e03d39c0260404d Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4837830 Reviewed-by: Ji Qiu Commit-Queue: Ji Qiu Auto-Submit: Yahan Lu Cr-Commit-Position: refs/heads/main@{#89780} Refs: https://github.com/v8/v8/commit/13192d6e10fa726858056e49fc9bca6201401171 PR-URL: https://github.com/nodejs/node/pull/50552 Reviewed-By: Richard Lau Reviewed-By: Michaël Zasso Reviewed-By: Debadree Chatterjee Reviewed-By: Jiawen Geng --- common.gypi | 2 +- deps/v8/src/builtins/riscv/builtins-riscv.cc | 2 +- deps/v8/src/codegen/riscv/assembler-riscv-inl.h | 16 ++++++++-------- deps/v8/src/codegen/riscv/assembler-riscv.h | 2 +- deps/v8/src/execution/riscv/simulator-riscv.cc | 8 ++++---- .../regexp/riscv/regexp-macro-assembler-riscv.cc | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/common.gypi b/common.gypi index 4589f515178093..d2cd306aadfebf 100644 --- a/common.gypi +++ b/common.gypi @@ -36,7 +36,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.25', + 'v8_embedder_string': '-node.15', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/builtins/riscv/builtins-riscv.cc b/deps/v8/src/builtins/riscv/builtins-riscv.cc index 3404562785991c..d6091434b9b0ad 100644 --- a/deps/v8/src/builtins/riscv/builtins-riscv.cc +++ b/deps/v8/src/builtins/riscv/builtins-riscv.cc @@ -1512,7 +1512,7 @@ static void Generate_InterpreterEnterBytecode(MacroAssembler* masm) { // Set the return address to the correct point in the interpreter entry // trampoline. Label builtin_trampoline, trampoline_loaded; - Smi interpreter_entry_return_pc_offset( + Tagged interpreter_entry_return_pc_offset( masm->isolate()->heap()->interpreter_entry_return_pc_offset()); DCHECK_NE(interpreter_entry_return_pc_offset, Smi::zero()); diff --git a/deps/v8/src/codegen/riscv/assembler-riscv-inl.h b/deps/v8/src/codegen/riscv/assembler-riscv-inl.h index 55f191e6afe76e..ca6d641e2c94ed 100644 --- a/deps/v8/src/codegen/riscv/assembler-riscv-inl.h +++ b/deps/v8/src/codegen/riscv/assembler-riscv-inl.h @@ -128,9 +128,9 @@ Handle Assembler::compressed_embedded_object_handle_at( } void Assembler::deserialization_set_special_target_at( - Address instruction_payload, Code code, Address target) { + Address instruction_payload, Tagged code, Address target) { set_target_address_at(instruction_payload, - !code.is_null() ? code.constant_pool() : kNullAddress, + !code.is_null() ? code->constant_pool() : kNullAddress, target); } @@ -159,12 +159,13 @@ void Assembler::deserialization_set_target_internal_reference_at( } } -HeapObject RelocInfo::target_object(PtrComprCageBase cage_base) { +Tagged RelocInfo::target_object(PtrComprCageBase cage_base) { DCHECK(IsCodeTarget(rmode_) || IsEmbeddedObjectMode(rmode_)); if (IsCompressedEmbeddedObject(rmode_)) { - return HeapObject::cast(Object(V8HeapCompressionScheme::DecompressTagged( - cage_base, - Assembler::target_compressed_address_at(pc_, constant_pool_)))); + return HeapObject::cast( + Tagged(V8HeapCompressionScheme::DecompressTagged( + cage_base, + Assembler::target_compressed_address_at(pc_, constant_pool_)))); } else { return HeapObject::cast( Object(Assembler::target_address_at(pc_, constant_pool_))); @@ -186,8 +187,7 @@ Handle RelocInfo::target_object_handle(Assembler* origin) { } } -void RelocInfo::set_target_object(Heap* heap, HeapObject target, - WriteBarrierMode write_barrier_mode, +void RelocInfo::set_target_object(Tagged target, ICacheFlushMode icache_flush_mode) { DCHECK(IsCodeTarget(rmode_) || IsEmbeddedObjectMode(rmode_)); if (IsCompressedEmbeddedObject(rmode_)) { diff --git a/deps/v8/src/codegen/riscv/assembler-riscv.h b/deps/v8/src/codegen/riscv/assembler-riscv.h index ed222b52d69279..bcd5a62d324ee5 100644 --- a/deps/v8/src/codegen/riscv/assembler-riscv.h +++ b/deps/v8/src/codegen/riscv/assembler-riscv.h @@ -286,7 +286,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase, // This is for calls and branches within generated code. The serializer // has already deserialized the lui/ori instructions etc. inline static void deserialization_set_special_target_at(Address location, - Code code, + Tagged code, Address target); // Get the size of the special target encoded at 'instruction_payload'. diff --git a/deps/v8/src/execution/riscv/simulator-riscv.cc b/deps/v8/src/execution/riscv/simulator-riscv.cc index 9582db489638a3..052a2d67dd7e44 100644 --- a/deps/v8/src/execution/riscv/simulator-riscv.cc +++ b/deps/v8/src/execution/riscv/simulator-riscv.cc @@ -1781,7 +1781,7 @@ void RiscvDebugger::Debug() { sreg_t value; StdoutStream os; if (GetValue(arg1, &value)) { - Object obj(value); + Tagged obj(value); os << arg1 << ": \n"; #ifdef DEBUG obj.Print(os); @@ -1830,7 +1830,7 @@ void RiscvDebugger::Debug() { PrintF(" 0x%012" PRIxPTR " : 0x%016" REGIx_FORMAT " %14" REGId_FORMAT " ", reinterpret_cast(cur), *cur, *cur); - Object obj(*cur); + Tagged obj(*cur); Heap* current_heap = sim_->isolate_->heap(); if (obj.IsSmi() || IsValidHeapObject(current_heap, HeapObject::cast(obj))) { @@ -4692,7 +4692,7 @@ bool Simulator::DecodeRvvVS() { Builtin Simulator::LookUp(Address pc) { for (Builtin builtin = Builtins::kFirst; builtin <= Builtins::kLast; ++builtin) { - if (builtins_.code(builtin).contains(isolate_, pc)) return builtin; + if (builtins_.code(builtin)->contains(isolate_, pc)) return builtin; } return Builtin::kNoBuiltinId; } @@ -4709,7 +4709,7 @@ void Simulator::DecodeRVIType() { if (builtin != Builtin::kNoBuiltinId) { auto code = builtins_.code(builtin); if ((rs1_reg() != ra || imm12() != 0)) { - if ((Address)get_pc() == code.InstructionStart()) { + if ((Address)get_pc() == code->instruction_start()) { sreg_t arg0 = get_register(a0); sreg_t arg1 = get_register(a1); sreg_t arg2 = get_register(a2); diff --git a/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc b/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc index 4063b4b3d21948..72f89767eb3489 100644 --- a/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc +++ b/deps/v8/src/regexp/riscv/regexp-macro-assembler-riscv.cc @@ -1211,7 +1211,7 @@ static T* frame_entry_address(Address re_frame, int frame_offset) { int64_t RegExpMacroAssemblerRISCV::CheckStackGuardState(Address* return_address, Address raw_code, Address re_frame) { - InstructionStream re_code = InstructionStream::cast(Object(raw_code)); + Tagged re_code = InstructionStream::cast(Object(raw_code)); return NativeRegExpMacroAssembler::CheckStackGuardState( frame_entry(re_frame, kIsolateOffset), static_cast(frame_entry(re_frame, kStartIndexOffset)), From ec67890824355657edc35073bfcc297883c74c55 Mon Sep 17 00:00:00 2001 From: Chengzhong Wu Date: Fri, 24 Nov 2023 11:38:54 +0800 Subject: [PATCH 211/229] deps: V8: cherry-pick 0f9ebbc672c7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original commit message: [flags] Remove --harmony-string-is-well-formed The String.prototype.isWellFormed and toWellFormed have shipped since M111. Bug: v8:13557 Change-Id: I27e332d2fde0f9ea8ad649c016a84d2d3e0bf592 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4931269 Reviewed-by: Shu-yu Guo Commit-Queue: Chengzhong Wu (legendecas) Cr-Commit-Position: refs/heads/main@{#90398} Refs: https://github.com/v8/v8/commit/0f9ebbc672c77594feb9176cf52601116aa7c34c PR-URL: https://github.com/nodejs/node/pull/50867 Reviewed-By: Michaël Zasso Reviewed-By: Jiawen Geng Reviewed-By: Richard Lau Reviewed-By: Yagiz Nizipli --- common.gypi | 2 +- deps/v8/src/flags/flag-definitions.h | 1 - deps/v8/src/init/bootstrapper.cc | 16 ++++------------ 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/common.gypi b/common.gypi index d2cd306aadfebf..4c856c0912260c 100644 --- a/common.gypi +++ b/common.gypi @@ -36,7 +36,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.15', + 'v8_embedder_string': '-node.17', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/src/flags/flag-definitions.h b/deps/v8/src/flags/flag-definitions.h index 1bb489833b81f8..e64c71cb8bd263 100644 --- a/deps/v8/src/flags/flag-definitions.h +++ b/deps/v8/src/flags/flag-definitions.h @@ -274,7 +274,6 @@ DEFINE_IMPLICATION(harmony_rab_gsab_transfer, harmony_rab_gsab) V(harmony_import_assertions, "harmony import assertions") \ V(harmony_symbol_as_weakmap_key, "harmony symbols as weakmap keys") \ V(harmony_change_array_by_copy, "harmony change-Array-by-copy") \ - V(harmony_string_is_well_formed, "harmony String#{is,to}WellFormed") \ V(harmony_rab_gsab, \ "harmony ResizableArrayBuffer / GrowableSharedArrayBuffer") \ V(harmony_regexp_unicode_sets, "harmony RegExp Unicode Sets") diff --git a/deps/v8/src/init/bootstrapper.cc b/deps/v8/src/init/bootstrapper.cc index c468c502e703df..3986ff40bd8adb 100644 --- a/deps/v8/src/init/bootstrapper.cc +++ b/deps/v8/src/init/bootstrapper.cc @@ -2114,6 +2114,8 @@ void Genesis::InitializeGlobal(Handle global_object, Builtin::kStringPrototypeIncludes, 1, false); SimpleInstallFunction(isolate_, prototype, "indexOf", Builtin::kStringPrototypeIndexOf, 1, false); + SimpleInstallFunction(isolate(), prototype, "isWellFormed", + Builtin::kStringPrototypeIsWellFormed, 0, false); SimpleInstallFunction(isolate_, prototype, "italics", Builtin::kStringPrototypeItalics, 0, false); SimpleInstallFunction(isolate_, prototype, "lastIndexOf", @@ -2170,6 +2172,8 @@ void Genesis::InitializeGlobal(Handle global_object, Builtin::kStringPrototypeStartsWith, 1, false); SimpleInstallFunction(isolate_, prototype, "toString", Builtin::kStringPrototypeToString, 0, true); + SimpleInstallFunction(isolate(), prototype, "toWellFormed", + Builtin::kStringPrototypeToWellFormed, 0, false); SimpleInstallFunction(isolate_, prototype, "trim", Builtin::kStringPrototypeTrim, 0, false); @@ -4959,18 +4963,6 @@ void Genesis::InitializeGlobal_harmony_rab_gsab() { Builtin::kSharedArrayBufferPrototypeGrow, 1, true); } -void Genesis::InitializeGlobal_harmony_string_is_well_formed() { - if (!v8_flags.harmony_string_is_well_formed) return; - Handle string_function(native_context()->string_function(), - isolate()); - Handle string_prototype( - JSObject::cast(string_function->initial_map().prototype()), isolate()); - SimpleInstallFunction(isolate(), string_prototype, "isWellFormed", - Builtin::kStringPrototypeIsWellFormed, 0, false); - SimpleInstallFunction(isolate(), string_prototype, "toWellFormed", - Builtin::kStringPrototypeToWellFormed, 0, false); -} - void Genesis::InitializeGlobal_harmony_temporal() { if (!v8_flags.harmony_temporal) return; // -- T e m p o r a l From 0be84e5a280e5f202a5aa2648065d75ffbdc8aa0 Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Tue, 21 Nov 2023 12:22:51 +0100 Subject: [PATCH 212/229] deps: update undici to 5.27.2 PR-URL: https://github.com/nodejs/node/pull/50813 Reviewed-By: Filip Skokan Reviewed-By: Mohammed Keyvanzadeh Reviewed-By: Matthew Aitken Reviewed-By: James M Snell --- deps/undici/src/lib/client.js | 37 +- deps/undici/src/lib/core/request.js | 42 +- deps/undici/src/lib/core/util.js | 2 +- deps/undici/src/lib/pool.js | 2 +- .../@fastify/busboy/lib/utils/decodeText.js | 114 +++ .../@fastify/busboy/lib/utils/parseParams.js | 196 +++++ .../node_modules/@fastify/busboy/package.json | 86 +++ deps/undici/src/package-lock.json | 313 ++++---- deps/undici/src/package.json | 2 +- deps/undici/undici.js | 688 ++++++++++++++++-- .../maintaining/maintaining-dependencies.md | 6 +- src/undici_version.h | 2 +- 12 files changed, 1262 insertions(+), 228 deletions(-) create mode 100644 deps/undici/src/node_modules/@fastify/busboy/lib/utils/decodeText.js create mode 100644 deps/undici/src/node_modules/@fastify/busboy/lib/utils/parseParams.js create mode 100644 deps/undici/src/node_modules/@fastify/busboy/package.json diff --git a/deps/undici/src/lib/client.js b/deps/undici/src/lib/client.js index 00f4467cc8b718..968a7f920710ec 100644 --- a/deps/undici/src/lib/client.js +++ b/deps/undici/src/lib/client.js @@ -1462,23 +1462,7 @@ function _resume (client, sync) { return } - if (util.isStream(request.body) && util.bodyLength(request.body) === 0) { - request.body - .on('data', /* istanbul ignore next */ function () { - /* istanbul ignore next */ - assert(false) - }) - .on('error', function (err) { - errorRequest(client, request, err) - }) - .on('end', function () { - util.destroy(this) - }) - - request.body = null - } - - if (client[kRunning] > 0 && + if (client[kRunning] > 0 && util.bodyLength(request.body) !== 0 && (util.isStream(request.body) || util.isAsyncIterable(request.body))) { // Request with stream or iterator body can error while other requests // are inflight and indirectly error those as well. @@ -1499,6 +1483,11 @@ function _resume (client, sync) { } } +// https://www.rfc-editor.org/rfc/rfc7230#section-3.3.2 +function shouldSendContentLength (method) { + return method !== 'GET' && method !== 'HEAD' && method !== 'OPTIONS' && method !== 'TRACE' && method !== 'CONNECT' +} + function write (client, request) { if (client[kHTTPConnVersion] === 'h2') { writeH2(client, client[kHTTP2Session], request) @@ -1527,7 +1516,9 @@ function write (client, request) { body.read(0) } - let contentLength = util.bodyLength(body) + const bodyLength = util.bodyLength(body) + + let contentLength = bodyLength if (contentLength === null) { contentLength = request.contentLength @@ -1542,7 +1533,9 @@ function write (client, request) { contentLength = null } - if (request.contentLength !== null && request.contentLength !== contentLength) { + // https://github.com/nodejs/undici/issues/2046 + // A user agent may send a Content-Length header with 0 value, this should be allowed. + if (shouldSendContentLength(method) && contentLength > 0 && request.contentLength !== null && request.contentLength !== contentLength) { if (client[kStrictContentLength]) { errorRequest(client, request, new RequestContentLengthMismatchError()) return false @@ -1623,7 +1616,7 @@ function write (client, request) { } /* istanbul ignore else: assertion */ - if (!body) { + if (!body || bodyLength === 0) { if (contentLength === 0) { socket.write(`${header}content-length: 0\r\n\r\n`, 'latin1') } else { @@ -1763,7 +1756,9 @@ function writeH2 (client, session, request) { contentLength = null } - if (request.contentLength != null && request.contentLength !== contentLength) { + // https://github.com/nodejs/undici/issues/2046 + // A user agent may send a Content-Length header with 0 value, this should be allowed. + if (shouldSendContentLength(method) && contentLength > 0 && request.contentLength != null && request.contentLength !== contentLength) { if (client[kStrictContentLength]) { errorRequest(client, request, new RequestContentLengthMismatchError()) return false diff --git a/deps/undici/src/lib/core/request.js b/deps/undici/src/lib/core/request.js index 43973884f1a3c8..7db05ce65aed4d 100644 --- a/deps/undici/src/lib/core/request.js +++ b/deps/undici/src/lib/core/request.js @@ -112,10 +112,29 @@ class Request { this.method = method + this.abort = null + if (body == null) { this.body = null } else if (util.isStream(body)) { this.body = body + + const rState = this.body._readableState + if (!rState || !rState.autoDestroy) { + this.endHandler = function autoDestroy () { + util.destroy(this) + } + this.body.on('end', this.endHandler) + } + + this.errorHandler = err => { + if (this.abort) { + this.abort(err) + } else { + this.error = err + } + } + this.body.on('error', this.errorHandler) } else if (util.isBuffer(body)) { this.body = body.byteLength ? body : null } else if (ArrayBuffer.isView(body)) { @@ -236,7 +255,12 @@ class Request { assert(!this.aborted) assert(!this.completed) - return this[kHandler].onConnect(abort) + if (this.error) { + abort(this.error) + } else { + this.abort = abort + return this[kHandler].onConnect(abort) + } } onHeaders (statusCode, headers, resume, statusText) { @@ -265,6 +289,8 @@ class Request { } onComplete (trailers) { + this.onFinally() + assert(!this.aborted) this.completed = true @@ -275,6 +301,8 @@ class Request { } onError (error) { + this.onFinally() + if (channels.error.hasSubscribers) { channels.error.publish({ request: this, error }) } @@ -286,6 +314,18 @@ class Request { return this[kHandler].onError(error) } + onFinally () { + if (this.errorHandler) { + this.body.off('error', this.errorHandler) + this.errorHandler = null + } + + if (this.endHandler) { + this.body.off('end', this.endHandler) + this.endHandler = null + } + } + // TODO: adjust to support H2 addHeader (key, value) { processHeader(this, key, value) diff --git a/deps/undici/src/lib/core/util.js b/deps/undici/src/lib/core/util.js index 259ba7b38a64e9..91f116582655e8 100644 --- a/deps/undici/src/lib/core/util.js +++ b/deps/undici/src/lib/core/util.js @@ -190,7 +190,7 @@ function isReadableAborted (stream) { } function destroy (stream, err) { - if (!isStream(stream) || isDestroyed(stream)) { + if (stream == null || !isStream(stream) || isDestroyed(stream)) { return } diff --git a/deps/undici/src/lib/pool.js b/deps/undici/src/lib/pool.js index 08509958069a4f..e3cd3399e6b544 100644 --- a/deps/undici/src/lib/pool.js +++ b/deps/undici/src/lib/pool.js @@ -57,7 +57,7 @@ class Pool extends PoolBase { maxCachedSessions, allowH2, socketPath, - timeout: connectTimeout == null ? 10e3 : connectTimeout, + timeout: connectTimeout, ...(util.nodeHasAutoSelectFamily && autoSelectFamily ? { autoSelectFamily, autoSelectFamilyAttemptTimeout } : undefined), ...connect }) diff --git a/deps/undici/src/node_modules/@fastify/busboy/lib/utils/decodeText.js b/deps/undici/src/node_modules/@fastify/busboy/lib/utils/decodeText.js new file mode 100644 index 00000000000000..be35d6b1052b0b --- /dev/null +++ b/deps/undici/src/node_modules/@fastify/busboy/lib/utils/decodeText.js @@ -0,0 +1,114 @@ +'use strict' + +// Node has always utf-8 +const utf8Decoder = new TextDecoder('utf-8') +const textDecoders = new Map([ + ['utf-8', utf8Decoder], + ['utf8', utf8Decoder] +]) + +function getDecoder (charset) { + let lc + while (true) { + switch (charset) { + case 'utf-8': + case 'utf8': + return decoders.utf8 + case 'latin1': + case 'ascii': // TODO: Make these a separate, strict decoder? + case 'us-ascii': + case 'iso-8859-1': + case 'iso8859-1': + case 'iso88591': + case 'iso_8859-1': + case 'windows-1252': + case 'iso_8859-1:1987': + case 'cp1252': + case 'x-cp1252': + return decoders.latin1 + case 'utf16le': + case 'utf-16le': + case 'ucs2': + case 'ucs-2': + return decoders.utf16le + case 'base64': + return decoders.base64 + default: + if (lc === undefined) { + lc = true + charset = charset.toLowerCase() + continue + } + return decoders.other.bind(charset) + } + } +} + +const decoders = { + utf8: (data, sourceEncoding) => { + if (data.length === 0) { + return '' + } + if (typeof data === 'string') { + data = Buffer.from(data, sourceEncoding) + } + return data.utf8Slice(0, data.length) + }, + + latin1: (data, sourceEncoding) => { + if (data.length === 0) { + return '' + } + if (typeof data === 'string') { + return data + } + return data.latin1Slice(0, data.length) + }, + + utf16le: (data, sourceEncoding) => { + if (data.length === 0) { + return '' + } + if (typeof data === 'string') { + data = Buffer.from(data, sourceEncoding) + } + return data.ucs2Slice(0, data.length) + }, + + base64: (data, sourceEncoding) => { + if (data.length === 0) { + return '' + } + if (typeof data === 'string') { + data = Buffer.from(data, sourceEncoding) + } + return data.base64Slice(0, data.length) + }, + + other: (data, sourceEncoding) => { + if (data.length === 0) { + return '' + } + if (typeof data === 'string') { + data = Buffer.from(data, sourceEncoding) + } + + if (textDecoders.has(this.toString())) { + try { + return textDecoders.get(this).decode(data) + } catch (e) { } + } + return typeof data === 'string' + ? data + : data.toString() + } +} + +function decodeText (text, sourceEncoding, destEncoding) { + if (text) { + return getDecoder(destEncoding)(text, sourceEncoding) + } + return text +} + +module.exports = decodeText diff --git a/deps/undici/src/node_modules/@fastify/busboy/lib/utils/parseParams.js b/deps/undici/src/node_modules/@fastify/busboy/lib/utils/parseParams.js new file mode 100644 index 00000000000000..1698e62e68ab82 --- /dev/null +++ b/deps/undici/src/node_modules/@fastify/busboy/lib/utils/parseParams.js @@ -0,0 +1,196 @@ +/* eslint-disable object-property-newline */ +'use strict' + +const decodeText = require('./decodeText') + +const RE_ENCODED = /%[a-fA-F0-9][a-fA-F0-9]/g + +const EncodedLookup = { + '%00': '\x00', '%01': '\x01', '%02': '\x02', '%03': '\x03', '%04': '\x04', + '%05': '\x05', '%06': '\x06', '%07': '\x07', '%08': '\x08', '%09': '\x09', + '%0a': '\x0a', '%0A': '\x0a', '%0b': '\x0b', '%0B': '\x0b', '%0c': '\x0c', + '%0C': '\x0c', '%0d': '\x0d', '%0D': '\x0d', '%0e': '\x0e', '%0E': '\x0e', + '%0f': '\x0f', '%0F': '\x0f', '%10': '\x10', '%11': '\x11', '%12': '\x12', + '%13': '\x13', '%14': '\x14', '%15': '\x15', '%16': '\x16', '%17': '\x17', + '%18': '\x18', '%19': '\x19', '%1a': '\x1a', '%1A': '\x1a', '%1b': '\x1b', + '%1B': '\x1b', '%1c': '\x1c', '%1C': '\x1c', '%1d': '\x1d', '%1D': '\x1d', + '%1e': '\x1e', '%1E': '\x1e', '%1f': '\x1f', '%1F': '\x1f', '%20': '\x20', + '%21': '\x21', '%22': '\x22', '%23': '\x23', '%24': '\x24', '%25': '\x25', + '%26': '\x26', '%27': '\x27', '%28': '\x28', '%29': '\x29', '%2a': '\x2a', + '%2A': '\x2a', '%2b': '\x2b', '%2B': '\x2b', '%2c': '\x2c', '%2C': '\x2c', + '%2d': '\x2d', '%2D': '\x2d', '%2e': '\x2e', '%2E': '\x2e', '%2f': '\x2f', + '%2F': '\x2f', '%30': '\x30', '%31': '\x31', '%32': '\x32', '%33': '\x33', + '%34': '\x34', '%35': '\x35', '%36': '\x36', '%37': '\x37', '%38': '\x38', + '%39': '\x39', '%3a': '\x3a', '%3A': '\x3a', '%3b': '\x3b', '%3B': '\x3b', + '%3c': '\x3c', '%3C': '\x3c', '%3d': '\x3d', '%3D': '\x3d', '%3e': '\x3e', + '%3E': '\x3e', '%3f': '\x3f', '%3F': '\x3f', '%40': '\x40', '%41': '\x41', + '%42': '\x42', '%43': '\x43', '%44': '\x44', '%45': '\x45', '%46': '\x46', + '%47': '\x47', '%48': '\x48', '%49': '\x49', '%4a': '\x4a', '%4A': '\x4a', + '%4b': '\x4b', '%4B': '\x4b', '%4c': '\x4c', '%4C': '\x4c', '%4d': '\x4d', + '%4D': '\x4d', '%4e': '\x4e', '%4E': '\x4e', '%4f': '\x4f', '%4F': '\x4f', + '%50': '\x50', '%51': '\x51', '%52': '\x52', '%53': '\x53', '%54': '\x54', + '%55': '\x55', '%56': '\x56', '%57': '\x57', '%58': '\x58', '%59': '\x59', + '%5a': '\x5a', '%5A': '\x5a', '%5b': '\x5b', '%5B': '\x5b', '%5c': '\x5c', + '%5C': '\x5c', '%5d': '\x5d', '%5D': '\x5d', '%5e': '\x5e', '%5E': '\x5e', + '%5f': '\x5f', '%5F': '\x5f', '%60': '\x60', '%61': '\x61', '%62': '\x62', + '%63': '\x63', '%64': '\x64', '%65': '\x65', '%66': '\x66', '%67': '\x67', + '%68': '\x68', '%69': '\x69', '%6a': '\x6a', '%6A': '\x6a', '%6b': '\x6b', + '%6B': '\x6b', '%6c': '\x6c', '%6C': '\x6c', '%6d': '\x6d', '%6D': '\x6d', + '%6e': '\x6e', '%6E': '\x6e', '%6f': '\x6f', '%6F': '\x6f', '%70': '\x70', + '%71': '\x71', '%72': '\x72', '%73': '\x73', '%74': '\x74', '%75': '\x75', + '%76': '\x76', '%77': '\x77', '%78': '\x78', '%79': '\x79', '%7a': '\x7a', + '%7A': '\x7a', '%7b': '\x7b', '%7B': '\x7b', '%7c': '\x7c', '%7C': '\x7c', + '%7d': '\x7d', '%7D': '\x7d', '%7e': '\x7e', '%7E': '\x7e', '%7f': '\x7f', + '%7F': '\x7f', '%80': '\x80', '%81': '\x81', '%82': '\x82', '%83': '\x83', + '%84': '\x84', '%85': '\x85', '%86': '\x86', '%87': '\x87', '%88': '\x88', + '%89': '\x89', '%8a': '\x8a', '%8A': '\x8a', '%8b': '\x8b', '%8B': '\x8b', + '%8c': '\x8c', '%8C': '\x8c', '%8d': '\x8d', '%8D': '\x8d', '%8e': '\x8e', + '%8E': '\x8e', '%8f': '\x8f', '%8F': '\x8f', '%90': '\x90', '%91': '\x91', + '%92': '\x92', '%93': '\x93', '%94': '\x94', '%95': '\x95', '%96': '\x96', + '%97': '\x97', '%98': '\x98', '%99': '\x99', '%9a': '\x9a', '%9A': '\x9a', + '%9b': '\x9b', '%9B': '\x9b', '%9c': '\x9c', '%9C': '\x9c', '%9d': '\x9d', + '%9D': '\x9d', '%9e': '\x9e', '%9E': '\x9e', '%9f': '\x9f', '%9F': '\x9f', + '%a0': '\xa0', '%A0': '\xa0', '%a1': '\xa1', '%A1': '\xa1', '%a2': '\xa2', + '%A2': '\xa2', '%a3': '\xa3', '%A3': '\xa3', '%a4': '\xa4', '%A4': '\xa4', + '%a5': '\xa5', '%A5': '\xa5', '%a6': '\xa6', '%A6': '\xa6', '%a7': '\xa7', + '%A7': '\xa7', '%a8': '\xa8', '%A8': '\xa8', '%a9': '\xa9', '%A9': '\xa9', + '%aa': '\xaa', '%Aa': '\xaa', '%aA': '\xaa', '%AA': '\xaa', '%ab': '\xab', + '%Ab': '\xab', '%aB': '\xab', '%AB': '\xab', '%ac': '\xac', '%Ac': '\xac', + '%aC': '\xac', '%AC': '\xac', '%ad': '\xad', '%Ad': '\xad', '%aD': '\xad', + '%AD': '\xad', '%ae': '\xae', '%Ae': '\xae', '%aE': '\xae', '%AE': '\xae', + '%af': '\xaf', '%Af': '\xaf', '%aF': '\xaf', '%AF': '\xaf', '%b0': '\xb0', + '%B0': '\xb0', '%b1': '\xb1', '%B1': '\xb1', '%b2': '\xb2', '%B2': '\xb2', + '%b3': '\xb3', '%B3': '\xb3', '%b4': '\xb4', '%B4': '\xb4', '%b5': '\xb5', + '%B5': '\xb5', '%b6': '\xb6', '%B6': '\xb6', '%b7': '\xb7', '%B7': '\xb7', + '%b8': '\xb8', '%B8': '\xb8', '%b9': '\xb9', '%B9': '\xb9', '%ba': '\xba', + '%Ba': '\xba', '%bA': '\xba', '%BA': '\xba', '%bb': '\xbb', '%Bb': '\xbb', + '%bB': '\xbb', '%BB': '\xbb', '%bc': '\xbc', '%Bc': '\xbc', '%bC': '\xbc', + '%BC': '\xbc', '%bd': '\xbd', '%Bd': '\xbd', '%bD': '\xbd', '%BD': '\xbd', + '%be': '\xbe', '%Be': '\xbe', '%bE': '\xbe', '%BE': '\xbe', '%bf': '\xbf', + '%Bf': '\xbf', '%bF': '\xbf', '%BF': '\xbf', '%c0': '\xc0', '%C0': '\xc0', + '%c1': '\xc1', '%C1': '\xc1', '%c2': '\xc2', '%C2': '\xc2', '%c3': '\xc3', + '%C3': '\xc3', '%c4': '\xc4', '%C4': '\xc4', '%c5': '\xc5', '%C5': '\xc5', + '%c6': '\xc6', '%C6': '\xc6', '%c7': '\xc7', '%C7': '\xc7', '%c8': '\xc8', + '%C8': '\xc8', '%c9': '\xc9', '%C9': '\xc9', '%ca': '\xca', '%Ca': '\xca', + '%cA': '\xca', '%CA': '\xca', '%cb': '\xcb', '%Cb': '\xcb', '%cB': '\xcb', + '%CB': '\xcb', '%cc': '\xcc', '%Cc': '\xcc', '%cC': '\xcc', '%CC': '\xcc', + '%cd': '\xcd', '%Cd': '\xcd', '%cD': '\xcd', '%CD': '\xcd', '%ce': '\xce', + '%Ce': '\xce', '%cE': '\xce', '%CE': '\xce', '%cf': '\xcf', '%Cf': '\xcf', + '%cF': '\xcf', '%CF': '\xcf', '%d0': '\xd0', '%D0': '\xd0', '%d1': '\xd1', + '%D1': '\xd1', '%d2': '\xd2', '%D2': '\xd2', '%d3': '\xd3', '%D3': '\xd3', + '%d4': '\xd4', '%D4': '\xd4', '%d5': '\xd5', '%D5': '\xd5', '%d6': '\xd6', + '%D6': '\xd6', '%d7': '\xd7', '%D7': '\xd7', '%d8': '\xd8', '%D8': '\xd8', + '%d9': '\xd9', '%D9': '\xd9', '%da': '\xda', '%Da': '\xda', '%dA': '\xda', + '%DA': '\xda', '%db': '\xdb', '%Db': '\xdb', '%dB': '\xdb', '%DB': '\xdb', + '%dc': '\xdc', '%Dc': '\xdc', '%dC': '\xdc', '%DC': '\xdc', '%dd': '\xdd', + '%Dd': '\xdd', '%dD': '\xdd', '%DD': '\xdd', '%de': '\xde', '%De': '\xde', + '%dE': '\xde', '%DE': '\xde', '%df': '\xdf', '%Df': '\xdf', '%dF': '\xdf', + '%DF': '\xdf', '%e0': '\xe0', '%E0': '\xe0', '%e1': '\xe1', '%E1': '\xe1', + '%e2': '\xe2', '%E2': '\xe2', '%e3': '\xe3', '%E3': '\xe3', '%e4': '\xe4', + '%E4': '\xe4', '%e5': '\xe5', '%E5': '\xe5', '%e6': '\xe6', '%E6': '\xe6', + '%e7': '\xe7', '%E7': '\xe7', '%e8': '\xe8', '%E8': '\xe8', '%e9': '\xe9', + '%E9': '\xe9', '%ea': '\xea', '%Ea': '\xea', '%eA': '\xea', '%EA': '\xea', + '%eb': '\xeb', '%Eb': '\xeb', '%eB': '\xeb', '%EB': '\xeb', '%ec': '\xec', + '%Ec': '\xec', '%eC': '\xec', '%EC': '\xec', '%ed': '\xed', '%Ed': '\xed', + '%eD': '\xed', '%ED': '\xed', '%ee': '\xee', '%Ee': '\xee', '%eE': '\xee', + '%EE': '\xee', '%ef': '\xef', '%Ef': '\xef', '%eF': '\xef', '%EF': '\xef', + '%f0': '\xf0', '%F0': '\xf0', '%f1': '\xf1', '%F1': '\xf1', '%f2': '\xf2', + '%F2': '\xf2', '%f3': '\xf3', '%F3': '\xf3', '%f4': '\xf4', '%F4': '\xf4', + '%f5': '\xf5', '%F5': '\xf5', '%f6': '\xf6', '%F6': '\xf6', '%f7': '\xf7', + '%F7': '\xf7', '%f8': '\xf8', '%F8': '\xf8', '%f9': '\xf9', '%F9': '\xf9', + '%fa': '\xfa', '%Fa': '\xfa', '%fA': '\xfa', '%FA': '\xfa', '%fb': '\xfb', + '%Fb': '\xfb', '%fB': '\xfb', '%FB': '\xfb', '%fc': '\xfc', '%Fc': '\xfc', + '%fC': '\xfc', '%FC': '\xfc', '%fd': '\xfd', '%Fd': '\xfd', '%fD': '\xfd', + '%FD': '\xfd', '%fe': '\xfe', '%Fe': '\xfe', '%fE': '\xfe', '%FE': '\xfe', + '%ff': '\xff', '%Ff': '\xff', '%fF': '\xff', '%FF': '\xff' +} + +function encodedReplacer (match) { + return EncodedLookup[match] +} + +const STATE_KEY = 0 +const STATE_VALUE = 1 +const STATE_CHARSET = 2 +const STATE_LANG = 3 + +function parseParams (str) { + const res = [] + let state = STATE_KEY + let charset = '' + let inquote = false + let escaping = false + let p = 0 + let tmp = '' + const len = str.length + + for (var i = 0; i < len; ++i) { // eslint-disable-line no-var + const char = str[i] + if (char === '\\' && inquote) { + if (escaping) { escaping = false } else { + escaping = true + continue + } + } else if (char === '"') { + if (!escaping) { + if (inquote) { + inquote = false + state = STATE_KEY + } else { inquote = true } + continue + } else { escaping = false } + } else { + if (escaping && inquote) { tmp += '\\' } + escaping = false + if ((state === STATE_CHARSET || state === STATE_LANG) && char === "'") { + if (state === STATE_CHARSET) { + state = STATE_LANG + charset = tmp.substring(1) + } else { state = STATE_VALUE } + tmp = '' + continue + } else if (state === STATE_KEY && + (char === '*' || char === '=') && + res.length) { + state = char === '*' + ? STATE_CHARSET + : STATE_VALUE + res[p] = [tmp, undefined] + tmp = '' + continue + } else if (!inquote && char === ';') { + state = STATE_KEY + if (charset) { + if (tmp.length) { + tmp = decodeText(tmp.replace(RE_ENCODED, encodedReplacer), + 'binary', + charset) + } + charset = '' + } else if (tmp.length) { + tmp = decodeText(tmp, 'binary', 'utf8') + } + if (res[p] === undefined) { res[p] = tmp } else { res[p][1] = tmp } + tmp = '' + ++p + continue + } else if (!inquote && (char === ' ' || char === '\t')) { continue } + } + tmp += char + } + if (charset && tmp.length) { + tmp = decodeText(tmp.replace(RE_ENCODED, encodedReplacer), + 'binary', + charset) + } else if (tmp) { + tmp = decodeText(tmp, 'binary', 'utf8') + } + + if (res[p] === undefined) { + if (tmp) { res[p] = tmp } + } else { res[p][1] = tmp } + + return res +} + +module.exports = parseParams diff --git a/deps/undici/src/node_modules/@fastify/busboy/package.json b/deps/undici/src/node_modules/@fastify/busboy/package.json new file mode 100644 index 00000000000000..4be895c108c837 --- /dev/null +++ b/deps/undici/src/node_modules/@fastify/busboy/package.json @@ -0,0 +1,86 @@ +{ + "name": "@fastify/busboy", + "version": "2.1.0", + "private": false, + "author": "Brian White ", + "contributors": [ + { + "name": "Igor Savin", + "email": "kibertoad@gmail.com", + "url": "https://github.com/kibertoad" + }, + { + "name": "Aras Abbasi", + "email": "aras.abbasi@gmail.com", + "url": "https://github.com/uzlopak" + } + ], + "description": "A streaming parser for HTML form data for node.js", + "main": "lib/main", + "type": "commonjs", + "types": "lib/main.d.ts", + "scripts": { + "bench:busboy": "cd benchmarks && npm install && npm run benchmark-fastify", + "bench:dicer": "node bench/dicer/dicer-bench-multipart-parser.js", + "coveralls": "nyc report --reporter=lcov", + "lint": "npm run lint:standard", + "lint:everything": "npm run lint && npm run test:types", + "lint:fix": "standard --fix", + "lint:standard": "standard --verbose | snazzy", + "test:mocha": "tap", + "test:types": "tsd", + "test:coverage": "nyc npm run test", + "test": "npm run test:mocha" + }, + "engines": { + "node": ">=14" + }, + "devDependencies": { + "@types/node": "^20.1.0", + "busboy": "^1.0.0", + "photofinish": "^1.8.0", + "snazzy": "^9.0.0", + "standard": "^17.0.0", + "tap": "^16.3.8", + "tinybench": "^2.5.1", + "tsd": "^0.29.0", + "typescript": "^5.0.2" + }, + "keywords": [ + "uploads", + "forms", + "multipart", + "form-data" + ], + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/fastify/busboy.git" + }, + "tsd": { + "directory": "test/types", + "compilerOptions": { + "esModuleInterop": false, + "module": "commonjs", + "target": "ES2017" + } + }, + "standard": { + "globals": [ + "describe", + "it" + ], + "ignore": [ + "bench" + ] + }, + "files": [ + "README.md", + "LICENSE", + "lib/*", + "deps/encoding/*", + "deps/dicer/lib", + "deps/streamsearch/", + "deps/dicer/LICENSE" + ] +} diff --git a/deps/undici/src/package-lock.json b/deps/undici/src/package-lock.json index d6f048345a665b..faba8557bd37ed 100644 --- a/deps/undici/src/package-lock.json +++ b/deps/undici/src/package-lock.json @@ -1,12 +1,12 @@ { "name": "undici", - "version": "5.27.0", + "version": "5.27.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "undici", - "version": "5.27.0", + "version": "5.27.2", "license": "MIT", "dependencies": { "@fastify/busboy": "^2.0.0" @@ -166,30 +166,30 @@ } }, "node_modules/@babel/compat-data": { - "version": "7.23.2", - "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.2.tgz", - "integrity": "sha512-0S9TQMmDHlqAZ2ITT95irXKfxN9bncq8ZCoJhun3nHL/lLUxd2NKBJYoNGWH7S0hz6fRQwWlAWn/ILM0C70KZQ==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.23.3.tgz", + "integrity": "sha512-BmR4bWbDIoFJmJ9z2cZ8Gmm2MXgEDgjdWgpKmKWUt54UGFJdlj31ECtbaDvCG/qVdG3AQ1SfpZEs01lUFbzLOQ==", "dev": true, "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/core": { - "version": "7.23.2", - "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.2.tgz", - "integrity": "sha512-n7s51eWdaWZ3vGT2tD4T7J6eJs3QoBXydv7vkUM06Bf1cbVD2Kc2UrkzhiQwobfV7NwOnQXYL7UBJ5VPU+RGoQ==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.23.3.tgz", + "integrity": "sha512-Jg+msLuNuCJDyBvFv5+OKOUjWMZgd85bKjbICd3zWrKAo+bJ49HJufi7CQE0q0uR8NGyO6xkCACScNqyjHSZew==", "dev": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.22.13", - "@babel/generator": "^7.23.0", + "@babel/generator": "^7.23.3", "@babel/helper-compilation-targets": "^7.22.15", - "@babel/helper-module-transforms": "^7.23.0", + "@babel/helper-module-transforms": "^7.23.3", "@babel/helpers": "^7.23.2", - "@babel/parser": "^7.23.0", + "@babel/parser": "^7.23.3", "@babel/template": "^7.22.15", - "@babel/traverse": "^7.23.2", - "@babel/types": "^7.23.0", + "@babel/traverse": "^7.23.3", + "@babel/types": "^7.23.3", "convert-source-map": "^2.0.0", "debug": "^4.1.0", "gensync": "^1.0.0-beta.2", @@ -237,12 +237,12 @@ } }, "node_modules/@babel/generator": { - "version": "7.23.0", - "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.23.0.tgz", - "integrity": "sha512-lN85QRR+5IbYrMWM6Y4pE/noaQtg4pNiqeNGX60eqOfo6gtEj6uw/JagelB8vVztSd7R6M5n1+PQkDbHbBRU4g==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.23.3.tgz", + "integrity": "sha512-keeZWAV4LU3tW0qRi19HRpabC/ilM0HRBBzf9/k8FFiG4KVpiv0FIy4hHfLfFQZNhziCTPTmd59zoyv6DNISzg==", "dev": true, "dependencies": { - "@babel/types": "^7.23.0", + "@babel/types": "^7.23.3", "@jridgewell/gen-mapping": "^0.3.2", "@jridgewell/trace-mapping": "^0.3.17", "jsesc": "^2.5.1" @@ -323,9 +323,9 @@ } }, "node_modules/@babel/helper-module-transforms": { - "version": "7.23.0", - "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.23.0.tgz", - "integrity": "sha512-WhDWw1tdrlT0gMgUJSlX0IQvoO1eN279zrAUbVB+KpV2c3Tylz8+GnKOLllCS6Z/iZQEyVYxhZVUdPTqs2YYPw==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.23.3.tgz", + "integrity": "sha512-7bBs4ED9OmswdfDzpz4MpWgSrV7FXlc3zIagvLFjS5H+Mk7Snr21vQ6QwrsoCGMfNC4e4LQPdoULEt4ykz0SRQ==", "dev": true, "dependencies": { "@babel/helper-environment-visitor": "^7.22.20", @@ -501,9 +501,9 @@ } }, "node_modules/@babel/parser": { - "version": "7.23.0", - "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.0.tgz", - "integrity": "sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.23.3.tgz", + "integrity": "sha512-uVsWNvlVsIninV2prNz/3lHCb+5CJ+e+IUBfbjToAHODtfGYLfCFuY4AU7TskI+dAKk+njsPiBjq1gKTvZOBaw==", "dev": true, "bin": { "parser": "bin/babel-parser.js" @@ -573,9 +573,9 @@ } }, "node_modules/@babel/plugin-syntax-jsx": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.22.5.tgz", - "integrity": "sha512-gvyP4hZrgrs/wWMaocvxZ44Hw0b3W8Pe+cMxc8V1ULQ07oh8VNbIRaoD1LRZVTvD+0nieDKjfgKg89sD7rrKrg==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.23.3.tgz", + "integrity": "sha512-EB2MELswq55OHUoRZLGg/zC7QWUKfNLpE57m/S2yr1uEneIgsTgrSzXP3NXEsMkVn76OlaVVnzN+ugObuYGwhg==", "dev": true, "dependencies": { "@babel/helper-plugin-utils": "^7.22.5" @@ -675,9 +675,9 @@ } }, "node_modules/@babel/plugin-syntax-typescript": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.22.5.tgz", - "integrity": "sha512-1mS2o03i7t1c6VzH6fdQ3OA8tcEIxwG18zIPRp+UY1Ihv6W+XZzBCVxExF9upussPXJ0xE9XRHwMoNs1ep/nRQ==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-typescript/-/plugin-syntax-typescript-7.23.3.tgz", + "integrity": "sha512-9EiNjVJOMwCO+43TqoTrgQ8jMwcAd0sWyXi9RPfIsLTj4R2MADDDQXELhffaUx/uJv2AYcxBgPwH6j4TIA4ytQ==", "dev": true, "dependencies": { "@babel/helper-plugin-utils": "^7.22.5" @@ -716,19 +716,19 @@ } }, "node_modules/@babel/traverse": { - "version": "7.23.2", - "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.23.2.tgz", - "integrity": "sha512-azpe59SQ48qG6nu2CzcMLbxUudtN+dOM9kDbUqGq3HXUJRlo7i8fvPoxQUzYgLZ4cMVmuZgm8vvBpNeRhd6XSw==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.23.3.tgz", + "integrity": "sha512-+K0yF1/9yR0oHdE0StHuEj3uTPzwwbrLGfNOndVJVV2TqA5+j3oljJUb4nmB954FLGjNem976+B+eDuLIjesiQ==", "dev": true, "dependencies": { "@babel/code-frame": "^7.22.13", - "@babel/generator": "^7.23.0", + "@babel/generator": "^7.23.3", "@babel/helper-environment-visitor": "^7.22.20", "@babel/helper-function-name": "^7.23.0", "@babel/helper-hoist-variables": "^7.22.5", "@babel/helper-split-export-declaration": "^7.22.6", - "@babel/parser": "^7.23.0", - "@babel/types": "^7.23.0", + "@babel/parser": "^7.23.3", + "@babel/types": "^7.23.3", "debug": "^4.1.0", "globals": "^11.1.0" }, @@ -760,9 +760,9 @@ "dev": true }, "node_modules/@babel/types": { - "version": "7.23.0", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.0.tgz", - "integrity": "sha512-0oIyUfKoI3mSqMvsxBdclDwxXKXAUA8v/apZbc+iSyARYou1o8ZGDxbUYyLFoW2arqS2jDGqJuZvv1d/io1axg==", + "version": "7.23.3", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.23.3.tgz", + "integrity": "sha512-OZnvoH2l8PK5eUvEcUyCt/sXgr/h+UWpVuBbOljwcrAgUl6lpchoQ++PHGyQy1AtYnVA6CEq3y5xeEI10brpXw==", "dev": true, "dependencies": { "@babel/helper-string-parser": "^7.22.5", @@ -804,9 +804,9 @@ } }, "node_modules/@eslint/eslintrc": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.2.tgz", - "integrity": "sha512-+wvgpDsrB1YqAMdEUCcnTlpfVBH7Vqn6A/NT3D8WVXFIaKMlErPIZT3oCIAVCOtarRpMtelZLqJeU3t7WY6X6g==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.3.tgz", + "integrity": "sha512-yZzuIG+jnVu6hNSzFEN07e8BxF3uAzYtQb6uDkaYZLo6oYZDCq454c5kB8zxnzfCYyP4MIuyBn10L0DqwujTmA==", "dev": true, "dependencies": { "ajv": "^6.12.4", @@ -895,16 +895,16 @@ } }, "node_modules/@eslint/js": { - "version": "8.52.0", - "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.52.0.tgz", - "integrity": "sha512-mjZVbpaeMZludF2fsWLD0Z9gCref1Tk4i9+wddjRvpUNqqcndPkBD09N/Mapey0b3jaXbLm2kICwFv2E64QinA==", + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.54.0.tgz", + "integrity": "sha512-ut5V+D+fOoWPgGGNj83GGjnntO39xDy6DWxO0wb7Jp3DcMX0TfIqdzHF85VTQkerdyGmuuMD9AKAo5KiNlf/AQ==", "dev": true, "engines": { "node": "^12.22.0 || ^14.17.0 || >=16.0.0" } }, "node_modules/@fastify/busboy": { - "version": "2.0.0", + "version": "2.1.0", "license": "MIT", "engines": { "node": ">=14" @@ -1502,9 +1502,9 @@ } }, "node_modules/@types/babel__core": { - "version": "7.20.3", - "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.3.tgz", - "integrity": "sha512-54fjTSeSHwfan8AyHWrKbfBWiEUrNTZsUwPTDSNaaP1QDQIZbeNUg3a59E9D+375MzUw/x1vx2/0F5LBz+AeYA==", + "version": "7.20.4", + "resolved": "https://registry.npmjs.org/@types/babel__core/-/babel__core-7.20.4.tgz", + "integrity": "sha512-mLnSC22IC4vcWiuObSRjrLd9XcBTGf59vUSoq2jkQDJ/QQ8PMI9rSuzE+aEV8karUMbskw07bKYoUJCKTUaygg==", "dev": true, "dependencies": { "@babel/parser": "^7.20.7", @@ -1515,18 +1515,18 @@ } }, "node_modules/@types/babel__generator": { - "version": "7.6.6", - "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.6.6.tgz", - "integrity": "sha512-66BXMKb/sUWbMdBNdMvajU7i/44RkrA3z/Yt1c7R5xejt8qh84iU54yUWCtm0QwGJlDcf/gg4zd/x4mpLAlb/w==", + "version": "7.6.7", + "resolved": "https://registry.npmjs.org/@types/babel__generator/-/babel__generator-7.6.7.tgz", + "integrity": "sha512-6Sfsq+EaaLrw4RmdFWE9Onp63TOUue71AWb4Gpa6JxzgTYtimbM086WnYTy2U67AofR++QKCo08ZP6pwx8YFHQ==", "dev": true, "dependencies": { "@babel/types": "^7.0.0" } }, "node_modules/@types/babel__template": { - "version": "7.4.3", - "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.3.tgz", - "integrity": "sha512-ciwyCLeuRfxboZ4isgdNZi/tkt06m8Tw6uGbBSBgWrnnZGNXiEyM27xc/PjXGQLqlZ6ylbgHMnm7ccF9tCkOeQ==", + "version": "7.4.4", + "resolved": "https://registry.npmjs.org/@types/babel__template/-/babel__template-7.4.4.tgz", + "integrity": "sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==", "dev": true, "dependencies": { "@babel/parser": "^7.1.0", @@ -1534,9 +1534,9 @@ } }, "node_modules/@types/babel__traverse": { - "version": "7.20.3", - "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.3.tgz", - "integrity": "sha512-Lsh766rGEFbaxMIDH7Qa+Yha8cMVI3qAK6CHt3OR0YfxOIn5Z54iHiyDRycHrBqeIiqGa20Kpsv1cavfBKkRSw==", + "version": "7.20.4", + "resolved": "https://registry.npmjs.org/@types/babel__traverse/-/babel__traverse-7.20.4.tgz", + "integrity": "sha512-mSM/iKUk5fDDrEV/e83qY+Cr3I1+Q3qqTuEn++HAWYjEa1+NxZr6CNrcJGf2ZTnq4HoFGC3zaTPZTobCzCFukA==", "dev": true, "dependencies": { "@babel/types": "^7.20.7" @@ -1559,9 +1559,9 @@ } }, "node_modules/@types/esprima": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/@types/esprima/-/esprima-4.0.5.tgz", - "integrity": "sha512-i1/wCCoHMGLSfEn68SZvao+ICYR7clqRfUAiCkxzqB7DuOMLVAgiiAcxSixtY7NukBnbQNDt5C1zmaLqCX8jDw==", + "version": "4.0.6", + "resolved": "https://registry.npmjs.org/@types/esprima/-/esprima-4.0.6.tgz", + "integrity": "sha512-lIk+kSt9lGv5hxK6aZNjiUEGZqKmOTpmg0tKiJQI+Ow98fLillxsiZNik5+RcP7mXL929KiTH/D9jGtpDlMbVw==", "dev": true, "dependencies": { "@types/estree": "*" @@ -1583,42 +1583,42 @@ "dev": true }, "node_modules/@types/graceful-fs": { - "version": "4.1.8", - "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.8.tgz", - "integrity": "sha512-NhRH7YzWq8WiNKVavKPBmtLYZHxNY19Hh+az28O/phfp68CF45pMFud+ZzJ8ewnxnC5smIdF3dqFeiSUQ5I+pw==", + "version": "4.1.9", + "resolved": "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.9.tgz", + "integrity": "sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==", "dev": true, "dependencies": { "@types/node": "*" } }, "node_modules/@types/istanbul-lib-coverage": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.5.tgz", - "integrity": "sha512-zONci81DZYCZjiLe0r6equvZut0b+dBRPBN5kBDjsONnutYNtJMoWQ9uR2RkL1gLG9NMTzvf+29e5RFfPbeKhQ==", + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.6.tgz", + "integrity": "sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==", "dev": true }, "node_modules/@types/istanbul-lib-report": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.2.tgz", - "integrity": "sha512-8toY6FgdltSdONav1XtUHl4LN1yTmLza+EuDazb/fEmRNCwjyqNVIQWs2IfC74IqjHkREs/nQ2FWq5kZU9IC0w==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/@types/istanbul-lib-report/-/istanbul-lib-report-3.0.3.tgz", + "integrity": "sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==", "dev": true, "dependencies": { "@types/istanbul-lib-coverage": "*" } }, "node_modules/@types/istanbul-reports": { - "version": "3.0.3", - "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.3.tgz", - "integrity": "sha512-1nESsePMBlf0RPRffLZi5ujYh7IH1BWL4y9pr+Bn3cJBdxz+RTP8bUFljLz9HvzhhOSWKdyBZ4DIivdL6rvgZg==", + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/@types/istanbul-reports/-/istanbul-reports-3.0.4.tgz", + "integrity": "sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==", "dev": true, "dependencies": { "@types/istanbul-lib-report": "*" } }, "node_modules/@types/json-schema": { - "version": "7.0.14", - "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.14.tgz", - "integrity": "sha512-U3PUjAudAdJBeC2pgN8uTIKgxrb4nlDF3SF0++EldXQvQBGkpFZMSnwQiIoDU77tv45VgNkl/L4ouD+rEomujw==", + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", "dev": true }, "node_modules/@types/json5": { @@ -1628,54 +1628,54 @@ "dev": true }, "node_modules/@types/minimist": { - "version": "1.2.4", - "resolved": "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.4.tgz", - "integrity": "sha512-Kfe/D3hxHTusnPNRbycJE1N77WHDsdS4AjUYIzlDzhDrS47NrwuL3YW4VITxwR7KCVpzwgy4Rbj829KSSQmwXQ==", + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.5.tgz", + "integrity": "sha512-hov8bUuiLiyFPGyFPE1lwWhmzYbirOXQNNo40+y3zow8aFVTeyn3VWL0VFFfdNddA8S4Vf0Tc062rzyNr7Paag==", "dev": true }, "node_modules/@types/node": { - "version": "18.18.7", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.7.tgz", - "integrity": "sha512-bw+lEsxis6eqJYW8Ql6+yTqkE6RuFtsQPSe5JxXbqYRFQEER5aJA9a5UH9igqDWm3X4iLHIKOHlnAXLM4mi7uQ==", + "version": "18.18.10", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.10.tgz", + "integrity": "sha512-luANqZxPmjTll8bduz4ACs/lNTCLuWssCyjqTY9yLdsv1xnViQp3ISKwsEWOIecO13JWUqjVdig/Vjjc09o8uA==", "dev": true, "dependencies": { "undici-types": "~5.26.4" } }, "node_modules/@types/node-forge": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/@types/node-forge/-/node-forge-1.3.8.tgz", - "integrity": "sha512-vGXshY9vim9CJjrpcS5raqSjEfKlJcWy2HNdgUasR66fAnVEYarrf1ULV4nfvpC1nZq/moA9qyqBcu83x+Jlrg==", + "version": "1.3.9", + "resolved": "https://registry.npmjs.org/@types/node-forge/-/node-forge-1.3.9.tgz", + "integrity": "sha512-meK88cx/sTalPSLSoCzkiUB4VPIFHmxtXm5FaaqRDqBX2i/Sy8bJ4odsan0b20RBjPh06dAQ+OTTdnyQyhJZyQ==", "dev": true, "dependencies": { "@types/node": "*" } }, "node_modules/@types/normalize-package-data": { - "version": "2.4.3", - "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.3.tgz", - "integrity": "sha512-ehPtgRgaULsFG8x0NeYJvmyH1hmlfsNLujHe9dQEia/7MAJYdzMSi19JtchUHjmBA6XC/75dK55mzZH+RyieSg==", + "version": "2.4.4", + "resolved": "https://registry.npmjs.org/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz", + "integrity": "sha512-37i+OaWTh9qeK4LSHPsyRC7NahnGotNuZvjLSgcPzblpHB3rrCJxAOgI5gCdKm7coonsaX1Of0ILiTcnZjbfxA==", "dev": true }, "node_modules/@types/stack-utils": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.2.tgz", - "integrity": "sha512-g7CK9nHdwjK2n0ymT2CW698FuWJRIx+RP6embAzZ2Qi8/ilIrA1Imt2LVSeHUzKvpoi7BhmmQcXz95eS0f2JXw==", + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz", + "integrity": "sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==", "dev": true }, "node_modules/@types/yargs": { - "version": "17.0.29", - "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.29.tgz", - "integrity": "sha512-nacjqA3ee9zRF/++a3FUY1suHTFKZeHba2n8WeDw9cCVdmzmHpIxyzOJBcpHvvEmS8E9KqWlSnWHUkOrkhWcvA==", + "version": "17.0.31", + "resolved": "https://registry.npmjs.org/@types/yargs/-/yargs-17.0.31.tgz", + "integrity": "sha512-bocYSx4DI8TmdlvxqGpVNXOgCNR1Jj0gNPhhAY+iz1rgKDAaYrAYdFYnhDV1IFuiuVc9HkOwyDcFxaTElF3/wg==", "dev": true, "dependencies": { "@types/yargs-parser": "*" } }, "node_modules/@types/yargs-parser": { - "version": "21.0.2", - "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.2.tgz", - "integrity": "sha512-5qcvofLPbfjmBfKaLfj/+f+Sbd6pN4zl7w7VSVI5uz7m9QZTuB2aZAa2uo1wHFBNN2x6g/SoTkXmd8mQnQF2Cw==", + "version": "21.0.3", + "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", + "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==", "dev": true }, "node_modules/@ungap/structured-clone": { @@ -2200,13 +2200,14 @@ } }, "node_modules/axios": { - "version": "0.27.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.27.2.tgz", - "integrity": "sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ==", + "version": "1.6.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.2.tgz", + "integrity": "sha512-7i24Ri4pmDRfJTR7LDBhsOTtcm+9kjX5WiY1X3wIisx6G9So3pfMkEiU7emUBe46oceVImccTEM3k6C5dbVW8A==", "dev": true, "dependencies": { - "follow-redirects": "^1.14.9", - "form-data": "^4.0.0" + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" } }, "node_modules/babel-jest": { @@ -2670,9 +2671,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001557", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001557.tgz", - "integrity": "sha512-91oR7hLNUP3gG6MLU+n96em322a8Xzes8wWdBKhLgUoiJsAF5irZnxSUCbc+qUZXNnPCfUwLOi9ZCZpkvjQajw==", + "version": "1.0.30001563", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001563.tgz", + "integrity": "sha512-na2WUmOxnwIZtwnFI2CZ/3er0wdNzU7hN+cPYz/z2ajHThnkWjNBOpEPP4n+4r2WPM847JaMotaJE3bnfzjyKw==", "dev": true, "funding": [ { @@ -3303,17 +3304,20 @@ } }, "node_modules/deep-equal": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-1.1.1.tgz", - "integrity": "sha512-yd9c5AdiqVcR+JjcwUQb9DkhJc8ngNr0MahEBGvDiJw8puWab2yZlh+nkasOnZP+EGTAP6rRp2JzJhJZzvNF8g==", + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/deep-equal/-/deep-equal-1.1.2.tgz", + "integrity": "sha512-5tdhKF6DbU7iIzrIOa1AOUt39ZRm13cmL1cGEh//aqR8x9+tNfbywRf0n5FD/18OKMdo7DNEtrX2t22ZAkI+eg==", "dev": true, "dependencies": { - "is-arguments": "^1.0.4", - "is-date-object": "^1.0.1", - "is-regex": "^1.0.4", - "object-is": "^1.0.1", + "is-arguments": "^1.1.1", + "is-date-object": "^1.0.5", + "is-regex": "^1.1.4", + "object-is": "^1.1.5", "object-keys": "^1.1.1", - "regexp.prototype.flags": "^1.2.0" + "regexp.prototype.flags": "^1.5.1" + }, + "engines": { + "node": ">= 0.4" }, "funding": { "url": "https://github.com/sponsors/ljharb" @@ -3794,9 +3798,9 @@ "dev": true }, "node_modules/electron-to-chromium": { - "version": "1.4.569", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.569.tgz", - "integrity": "sha512-LsrJjZ0IbVy12ApW3gpYpcmHS3iRxH4bkKOW98y1/D+3cvDUWGcbzbsFinfUS8knpcZk/PG/2p/RnkMCYN7PVg==", + "version": "1.4.588", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.4.588.tgz", + "integrity": "sha512-soytjxwbgcCu7nh5Pf4S2/4wa6UIu+A3p03U2yVr53qGxi1/VTR3ENI+p50v+UxqqZAfl48j3z55ud7VHIOr9w==", "dev": true }, "node_modules/emittery": { @@ -4049,15 +4053,15 @@ } }, "node_modules/eslint": { - "version": "8.52.0", - "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.52.0.tgz", - "integrity": "sha512-zh/JHnaixqHZsolRB/w9/02akBk9EPrOs9JwcTP2ek7yL5bVvXuRariiaAjjoJ5DvuwQ1WAE/HsMz+w17YgBCg==", + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.54.0.tgz", + "integrity": "sha512-NY0DfAkM8BIZDVl6PgSa1ttZbx3xHgJzSNJKYcQglem6CppHyMhRIQkBVSSMaSRnLhig3jsDbEzOjwCVt4AmmA==", "dev": true, "dependencies": { "@eslint-community/eslint-utils": "^4.2.0", "@eslint-community/regexpp": "^4.6.1", - "@eslint/eslintrc": "^2.1.2", - "@eslint/js": "8.52.0", + "@eslint/eslintrc": "^2.1.3", + "@eslint/js": "8.54.0", "@humanwhocodes/config-array": "^0.11.13", "@humanwhocodes/module-importer": "^1.0.1", "@nodelib/fs.walk": "^1.2.8", @@ -4876,9 +4880,9 @@ "dev": true }, "node_modules/fast-glob": { - "version": "3.3.1", - "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.1.tgz", - "integrity": "sha512-kNFPyjhh5cKjrUltxs+wFx+ZkbRaxxmZ+X0ZU31SOsxCEtP9VPgtq2teZw1DebupL5GmDaNQ6yKMMVcM41iqDg==", + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", "dev": true, "dependencies": { "@nodelib/fs.stat": "^2.0.2", @@ -5126,9 +5130,9 @@ } }, "node_modules/flat-cache": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.1.1.tgz", - "integrity": "sha512-/qM2b3LUIaIgviBQovTLvijfyOQXPtSRnRK26ksj2J7rzPIecePUIpJsZ4T02Qg+xiAEKIs5K8dsHEd+VaKa/Q==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.2.0.tgz", + "integrity": "sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw==", "dev": true, "dependencies": { "flatted": "^3.2.9", @@ -5136,7 +5140,7 @@ "rimraf": "^3.0.2" }, "engines": { - "node": ">=12.0.0" + "node": "^10.12.0 || >=12.0.0" } }, "node_modules/flat-cache/node_modules/rimraf": { @@ -5990,9 +5994,9 @@ } }, "node_modules/ignore": { - "version": "5.2.4", - "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz", - "integrity": "sha512-MAb38BcSbH0eHNBxn7ql2NH/kX33OkB3lZ1BNdh7ENeRChHTYsTvWrMubiIAMNS2llXEEgZ1MUOBtXChP3kaFQ==", + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.0.tgz", + "integrity": "sha512-g7dmpshy+gD7mh88OC9NwSGTKoc3kyLAZQRU1mt53Aw/vnvfXnbC+F/7F7QoYVKbV+KNvJx8wArewKy1vXMtlg==", "dev": true, "engines": { "node": ">= 4" @@ -6620,9 +6624,9 @@ "dev": true }, "node_modules/istanbul-lib-coverage": { - "version": "3.2.0", - "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.0.tgz", - "integrity": "sha512-eOeJ5BHCmHYvQK7xt9GkdHuzuCGS1Y6g9Gvnx3Ym33fz/HpLRYxiS0wHNr+m/MBC8B647Xt608vCDEvhl9c6Mw==", + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/istanbul-lib-coverage/-/istanbul-lib-coverage-3.2.2.tgz", + "integrity": "sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==", "dev": true, "engines": { "node": ">=8" @@ -8083,9 +8087,9 @@ } }, "node_modules/medium-zoom": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/medium-zoom/-/medium-zoom-1.0.8.tgz", - "integrity": "sha512-CjFVuFq/IfrdqesAXfg+hzlDKu6A2n80ZIq0Kl9kWjoHh9j1N9Uvk5X0/MmN0hOfm5F9YBswlClhcwnmtwz7gA==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/medium-zoom/-/medium-zoom-1.1.0.tgz", + "integrity": "sha512-ewyDsp7k4InCUp3jRmwHBRFGyjBimKps/AJLjRSox+2q/2H4p/PNpQf+pwONWlJiOudkBXtbdmVbFjqyybfTmQ==", "dev": true }, "node_modules/meow": { @@ -8127,10 +8131,13 @@ } }, "node_modules/merge-descriptors": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.1.tgz", - "integrity": "sha512-cCi6g3/Zr1iqQi6ySbseM1Xvooa98N0w31jzUYrXPX2xqObmFGHJ0tQ5u74H3mVh7wLouTseZyYIq39g8cNp1w==", - "dev": true + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-1.0.3.tgz", + "integrity": "sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==", + "dev": true, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } }, "node_modules/merge-source-map": { "version": "1.1.0", @@ -10140,6 +10147,12 @@ "proxy": "bin/proxy.js" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", + "dev": true + }, "node_modules/proxy/node_modules/debug": { "version": "4.3.4", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", @@ -10197,9 +10210,9 @@ } }, "node_modules/punycode": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.0.tgz", - "integrity": "sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA==", + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", "dev": true, "engines": { "node": ">=6" @@ -14533,16 +14546,16 @@ } }, "node_modules/wait-on": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/wait-on/-/wait-on-7.0.1.tgz", - "integrity": "sha512-9AnJE9qTjRQOlTZIldAaf/da2eW0eSRSgcqq85mXQja/DW3MriHxkpODDSUEg+Gri/rKEcXUZHe+cevvYItaog==", + "version": "7.2.0", + "resolved": "https://registry.npmjs.org/wait-on/-/wait-on-7.2.0.tgz", + "integrity": "sha512-wCQcHkRazgjG5XoAq9jbTMLpNIjoSlZslrJ2+N9MxDsGEv1HnFoVjOCexL0ESva7Y9cu350j+DWADdk54s4AFQ==", "dev": true, "dependencies": { - "axios": "^0.27.2", - "joi": "^17.7.0", + "axios": "^1.6.1", + "joi": "^17.11.0", "lodash": "^4.17.21", - "minimist": "^1.2.7", - "rxjs": "^7.8.0" + "minimist": "^1.2.8", + "rxjs": "^7.8.1" }, "bin": { "wait-on": "bin/wait-on" diff --git a/deps/undici/src/package.json b/deps/undici/src/package.json index 454271d7b466f4..ea1228f94b92f8 100644 --- a/deps/undici/src/package.json +++ b/deps/undici/src/package.json @@ -1,6 +1,6 @@ { "name": "undici", - "version": "5.27.0", + "version": "5.27.2", "description": "An HTTP/1.1 client, written from scratch for Node.js", "homepage": "https://undici.nodejs.org", "bugs": { diff --git a/deps/undici/undici.js b/deps/undici/undici.js index a5da33fa3b0075..320fcb7cefddeb 100644 --- a/deps/undici/undici.js +++ b/deps/undici/undici.js @@ -495,7 +495,7 @@ var require_util = __commonJS({ } __name(isReadableAborted, "isReadableAborted"); function destroy(stream2, err) { - if (!isStream(stream2) || isDestroyed(stream2)) { + if (stream2 == null || !isStream(stream2) || isDestroyed(stream2)) { return; } if (typeof stream2.destroy === "function") { @@ -2818,20 +2818,99 @@ var require_decodeText = __commonJS({ ["utf-8", utf8Decoder], ["utf8", utf8Decoder] ]); - function decodeText(text, textEncoding, destEncoding) { - if (text) { - if (textDecoders.has(destEncoding)) { - try { - return textDecoders.get(destEncoding).decode(Buffer.from(text, textEncoding)); - } catch (e) { - } - } else { + function getDecoder(charset) { + let lc; + while (true) { + switch (charset) { + case "utf-8": + case "utf8": + return decoders.utf8; + case "latin1": + case "ascii": + case "us-ascii": + case "iso-8859-1": + case "iso8859-1": + case "iso88591": + case "iso_8859-1": + case "windows-1252": + case "iso_8859-1:1987": + case "cp1252": + case "x-cp1252": + return decoders.latin1; + case "utf16le": + case "utf-16le": + case "ucs2": + case "ucs-2": + return decoders.utf16le; + case "base64": + return decoders.base64; + default: + if (lc === void 0) { + lc = true; + charset = charset.toLowerCase(); + continue; + } + return decoders.other.bind(charset); + } + } + } + __name(getDecoder, "getDecoder"); + var decoders = { + utf8: (data, sourceEncoding) => { + if (data.length === 0) { + return ""; + } + if (typeof data === "string") { + data = Buffer.from(data, sourceEncoding); + } + return data.utf8Slice(0, data.length); + }, + latin1: (data, sourceEncoding) => { + if (data.length === 0) { + return ""; + } + if (typeof data === "string") { + return data; + } + return data.latin1Slice(0, data.length); + }, + utf16le: (data, sourceEncoding) => { + if (data.length === 0) { + return ""; + } + if (typeof data === "string") { + data = Buffer.from(data, sourceEncoding); + } + return data.ucs2Slice(0, data.length); + }, + base64: (data, sourceEncoding) => { + if (data.length === 0) { + return ""; + } + if (typeof data === "string") { + data = Buffer.from(data, sourceEncoding); + } + return data.base64Slice(0, data.length); + }, + other: (data, sourceEncoding) => { + if (data.length === 0) { + return ""; + } + if (typeof data === "string") { + data = Buffer.from(data, sourceEncoding); + } + if (textDecoders.has(exports2.toString())) { try { - textDecoders.set(destEncoding, new TextDecoder(destEncoding)); - return textDecoders.get(destEncoding).decode(Buffer.from(text, textEncoding)); + return textDecoders.get(exports2).decode(data); } catch (e) { } } + return typeof data === "string" ? data : data.toString(); + } + }; + function decodeText(text, sourceEncoding, destEncoding) { + if (text) { + return getDecoder(destEncoding)(text, sourceEncoding); } return text; } @@ -2845,20 +2924,511 @@ var require_parseParams = __commonJS({ "node_modules/@fastify/busboy/lib/utils/parseParams.js"(exports2, module2) { "use strict"; var decodeText = require_decodeText(); - var RE_ENCODED = /%([a-fA-F0-9]{2})/g; - function encodedReplacer(match, byte) { - return String.fromCharCode(parseInt(byte, 16)); + var RE_ENCODED = /%[a-fA-F0-9][a-fA-F0-9]/g; + var EncodedLookup = { + "%00": "\0", + "%01": "", + "%02": "", + "%03": "", + "%04": "", + "%05": "", + "%06": "", + "%07": "\x07", + "%08": "\b", + "%09": " ", + "%0a": "\n", + "%0A": "\n", + "%0b": "\v", + "%0B": "\v", + "%0c": "\f", + "%0C": "\f", + "%0d": "\r", + "%0D": "\r", + "%0e": "", + "%0E": "", + "%0f": "", + "%0F": "", + "%10": "", + "%11": "", + "%12": "", + "%13": "", + "%14": "", + "%15": "", + "%16": "", + "%17": "", + "%18": "", + "%19": "", + "%1a": "", + "%1A": "", + "%1b": "\x1B", + "%1B": "\x1B", + "%1c": "", + "%1C": "", + "%1d": "", + "%1D": "", + "%1e": "", + "%1E": "", + "%1f": "", + "%1F": "", + "%20": " ", + "%21": "!", + "%22": '"', + "%23": "#", + "%24": "$", + "%25": "%", + "%26": "&", + "%27": "'", + "%28": "(", + "%29": ")", + "%2a": "*", + "%2A": "*", + "%2b": "+", + "%2B": "+", + "%2c": ",", + "%2C": ",", + "%2d": "-", + "%2D": "-", + "%2e": ".", + "%2E": ".", + "%2f": "/", + "%2F": "/", + "%30": "0", + "%31": "1", + "%32": "2", + "%33": "3", + "%34": "4", + "%35": "5", + "%36": "6", + "%37": "7", + "%38": "8", + "%39": "9", + "%3a": ":", + "%3A": ":", + "%3b": ";", + "%3B": ";", + "%3c": "<", + "%3C": "<", + "%3d": "=", + "%3D": "=", + "%3e": ">", + "%3E": ">", + "%3f": "?", + "%3F": "?", + "%40": "@", + "%41": "A", + "%42": "B", + "%43": "C", + "%44": "D", + "%45": "E", + "%46": "F", + "%47": "G", + "%48": "H", + "%49": "I", + "%4a": "J", + "%4A": "J", + "%4b": "K", + "%4B": "K", + "%4c": "L", + "%4C": "L", + "%4d": "M", + "%4D": "M", + "%4e": "N", + "%4E": "N", + "%4f": "O", + "%4F": "O", + "%50": "P", + "%51": "Q", + "%52": "R", + "%53": "S", + "%54": "T", + "%55": "U", + "%56": "V", + "%57": "W", + "%58": "X", + "%59": "Y", + "%5a": "Z", + "%5A": "Z", + "%5b": "[", + "%5B": "[", + "%5c": "\\", + "%5C": "\\", + "%5d": "]", + "%5D": "]", + "%5e": "^", + "%5E": "^", + "%5f": "_", + "%5F": "_", + "%60": "`", + "%61": "a", + "%62": "b", + "%63": "c", + "%64": "d", + "%65": "e", + "%66": "f", + "%67": "g", + "%68": "h", + "%69": "i", + "%6a": "j", + "%6A": "j", + "%6b": "k", + "%6B": "k", + "%6c": "l", + "%6C": "l", + "%6d": "m", + "%6D": "m", + "%6e": "n", + "%6E": "n", + "%6f": "o", + "%6F": "o", + "%70": "p", + "%71": "q", + "%72": "r", + "%73": "s", + "%74": "t", + "%75": "u", + "%76": "v", + "%77": "w", + "%78": "x", + "%79": "y", + "%7a": "z", + "%7A": "z", + "%7b": "{", + "%7B": "{", + "%7c": "|", + "%7C": "|", + "%7d": "}", + "%7D": "}", + "%7e": "~", + "%7E": "~", + "%7f": "\x7F", + "%7F": "\x7F", + "%80": "\x80", + "%81": "\x81", + "%82": "\x82", + "%83": "\x83", + "%84": "\x84", + "%85": "\x85", + "%86": "\x86", + "%87": "\x87", + "%88": "\x88", + "%89": "\x89", + "%8a": "\x8A", + "%8A": "\x8A", + "%8b": "\x8B", + "%8B": "\x8B", + "%8c": "\x8C", + "%8C": "\x8C", + "%8d": "\x8D", + "%8D": "\x8D", + "%8e": "\x8E", + "%8E": "\x8E", + "%8f": "\x8F", + "%8F": "\x8F", + "%90": "\x90", + "%91": "\x91", + "%92": "\x92", + "%93": "\x93", + "%94": "\x94", + "%95": "\x95", + "%96": "\x96", + "%97": "\x97", + "%98": "\x98", + "%99": "\x99", + "%9a": "\x9A", + "%9A": "\x9A", + "%9b": "\x9B", + "%9B": "\x9B", + "%9c": "\x9C", + "%9C": "\x9C", + "%9d": "\x9D", + "%9D": "\x9D", + "%9e": "\x9E", + "%9E": "\x9E", + "%9f": "\x9F", + "%9F": "\x9F", + "%a0": "\xA0", + "%A0": "\xA0", + "%a1": "\xA1", + "%A1": "\xA1", + "%a2": "\xA2", + "%A2": "\xA2", + "%a3": "\xA3", + "%A3": "\xA3", + "%a4": "\xA4", + "%A4": "\xA4", + "%a5": "\xA5", + "%A5": "\xA5", + "%a6": "\xA6", + "%A6": "\xA6", + "%a7": "\xA7", + "%A7": "\xA7", + "%a8": "\xA8", + "%A8": "\xA8", + "%a9": "\xA9", + "%A9": "\xA9", + "%aa": "\xAA", + "%Aa": "\xAA", + "%aA": "\xAA", + "%AA": "\xAA", + "%ab": "\xAB", + "%Ab": "\xAB", + "%aB": "\xAB", + "%AB": "\xAB", + "%ac": "\xAC", + "%Ac": "\xAC", + "%aC": "\xAC", + "%AC": "\xAC", + "%ad": "\xAD", + "%Ad": "\xAD", + "%aD": "\xAD", + "%AD": "\xAD", + "%ae": "\xAE", + "%Ae": "\xAE", + "%aE": "\xAE", + "%AE": "\xAE", + "%af": "\xAF", + "%Af": "\xAF", + "%aF": "\xAF", + "%AF": "\xAF", + "%b0": "\xB0", + "%B0": "\xB0", + "%b1": "\xB1", + "%B1": "\xB1", + "%b2": "\xB2", + "%B2": "\xB2", + "%b3": "\xB3", + "%B3": "\xB3", + "%b4": "\xB4", + "%B4": "\xB4", + "%b5": "\xB5", + "%B5": "\xB5", + "%b6": "\xB6", + "%B6": "\xB6", + "%b7": "\xB7", + "%B7": "\xB7", + "%b8": "\xB8", + "%B8": "\xB8", + "%b9": "\xB9", + "%B9": "\xB9", + "%ba": "\xBA", + "%Ba": "\xBA", + "%bA": "\xBA", + "%BA": "\xBA", + "%bb": "\xBB", + "%Bb": "\xBB", + "%bB": "\xBB", + "%BB": "\xBB", + "%bc": "\xBC", + "%Bc": "\xBC", + "%bC": "\xBC", + "%BC": "\xBC", + "%bd": "\xBD", + "%Bd": "\xBD", + "%bD": "\xBD", + "%BD": "\xBD", + "%be": "\xBE", + "%Be": "\xBE", + "%bE": "\xBE", + "%BE": "\xBE", + "%bf": "\xBF", + "%Bf": "\xBF", + "%bF": "\xBF", + "%BF": "\xBF", + "%c0": "\xC0", + "%C0": "\xC0", + "%c1": "\xC1", + "%C1": "\xC1", + "%c2": "\xC2", + "%C2": "\xC2", + "%c3": "\xC3", + "%C3": "\xC3", + "%c4": "\xC4", + "%C4": "\xC4", + "%c5": "\xC5", + "%C5": "\xC5", + "%c6": "\xC6", + "%C6": "\xC6", + "%c7": "\xC7", + "%C7": "\xC7", + "%c8": "\xC8", + "%C8": "\xC8", + "%c9": "\xC9", + "%C9": "\xC9", + "%ca": "\xCA", + "%Ca": "\xCA", + "%cA": "\xCA", + "%CA": "\xCA", + "%cb": "\xCB", + "%Cb": "\xCB", + "%cB": "\xCB", + "%CB": "\xCB", + "%cc": "\xCC", + "%Cc": "\xCC", + "%cC": "\xCC", + "%CC": "\xCC", + "%cd": "\xCD", + "%Cd": "\xCD", + "%cD": "\xCD", + "%CD": "\xCD", + "%ce": "\xCE", + "%Ce": "\xCE", + "%cE": "\xCE", + "%CE": "\xCE", + "%cf": "\xCF", + "%Cf": "\xCF", + "%cF": "\xCF", + "%CF": "\xCF", + "%d0": "\xD0", + "%D0": "\xD0", + "%d1": "\xD1", + "%D1": "\xD1", + "%d2": "\xD2", + "%D2": "\xD2", + "%d3": "\xD3", + "%D3": "\xD3", + "%d4": "\xD4", + "%D4": "\xD4", + "%d5": "\xD5", + "%D5": "\xD5", + "%d6": "\xD6", + "%D6": "\xD6", + "%d7": "\xD7", + "%D7": "\xD7", + "%d8": "\xD8", + "%D8": "\xD8", + "%d9": "\xD9", + "%D9": "\xD9", + "%da": "\xDA", + "%Da": "\xDA", + "%dA": "\xDA", + "%DA": "\xDA", + "%db": "\xDB", + "%Db": "\xDB", + "%dB": "\xDB", + "%DB": "\xDB", + "%dc": "\xDC", + "%Dc": "\xDC", + "%dC": "\xDC", + "%DC": "\xDC", + "%dd": "\xDD", + "%Dd": "\xDD", + "%dD": "\xDD", + "%DD": "\xDD", + "%de": "\xDE", + "%De": "\xDE", + "%dE": "\xDE", + "%DE": "\xDE", + "%df": "\xDF", + "%Df": "\xDF", + "%dF": "\xDF", + "%DF": "\xDF", + "%e0": "\xE0", + "%E0": "\xE0", + "%e1": "\xE1", + "%E1": "\xE1", + "%e2": "\xE2", + "%E2": "\xE2", + "%e3": "\xE3", + "%E3": "\xE3", + "%e4": "\xE4", + "%E4": "\xE4", + "%e5": "\xE5", + "%E5": "\xE5", + "%e6": "\xE6", + "%E6": "\xE6", + "%e7": "\xE7", + "%E7": "\xE7", + "%e8": "\xE8", + "%E8": "\xE8", + "%e9": "\xE9", + "%E9": "\xE9", + "%ea": "\xEA", + "%Ea": "\xEA", + "%eA": "\xEA", + "%EA": "\xEA", + "%eb": "\xEB", + "%Eb": "\xEB", + "%eB": "\xEB", + "%EB": "\xEB", + "%ec": "\xEC", + "%Ec": "\xEC", + "%eC": "\xEC", + "%EC": "\xEC", + "%ed": "\xED", + "%Ed": "\xED", + "%eD": "\xED", + "%ED": "\xED", + "%ee": "\xEE", + "%Ee": "\xEE", + "%eE": "\xEE", + "%EE": "\xEE", + "%ef": "\xEF", + "%Ef": "\xEF", + "%eF": "\xEF", + "%EF": "\xEF", + "%f0": "\xF0", + "%F0": "\xF0", + "%f1": "\xF1", + "%F1": "\xF1", + "%f2": "\xF2", + "%F2": "\xF2", + "%f3": "\xF3", + "%F3": "\xF3", + "%f4": "\xF4", + "%F4": "\xF4", + "%f5": "\xF5", + "%F5": "\xF5", + "%f6": "\xF6", + "%F6": "\xF6", + "%f7": "\xF7", + "%F7": "\xF7", + "%f8": "\xF8", + "%F8": "\xF8", + "%f9": "\xF9", + "%F9": "\xF9", + "%fa": "\xFA", + "%Fa": "\xFA", + "%fA": "\xFA", + "%FA": "\xFA", + "%fb": "\xFB", + "%Fb": "\xFB", + "%fB": "\xFB", + "%FB": "\xFB", + "%fc": "\xFC", + "%Fc": "\xFC", + "%fC": "\xFC", + "%FC": "\xFC", + "%fd": "\xFD", + "%Fd": "\xFD", + "%fD": "\xFD", + "%FD": "\xFD", + "%fe": "\xFE", + "%Fe": "\xFE", + "%fE": "\xFE", + "%FE": "\xFE", + "%ff": "\xFF", + "%Ff": "\xFF", + "%fF": "\xFF", + "%FF": "\xFF" + }; + function encodedReplacer(match) { + return EncodedLookup[match]; } __name(encodedReplacer, "encodedReplacer"); + var STATE_KEY = 0; + var STATE_VALUE = 1; + var STATE_CHARSET = 2; + var STATE_LANG = 3; function parseParams(str) { const res = []; - let state = "key"; + let state = STATE_KEY; let charset = ""; let inquote = false; let escaping = false; let p = 0; let tmp = ""; - for (var i = 0, len = str.length; i < len; ++i) { + const len = str.length; + for (var i = 0; i < len; ++i) { const char = str[i]; if (char === "\\" && inquote) { if (escaping) { @@ -2871,7 +3441,7 @@ var require_parseParams = __commonJS({ if (!escaping) { if (inquote) { inquote = false; - state = "key"; + state = STATE_KEY; } else { inquote = true; } @@ -2884,26 +3454,22 @@ var require_parseParams = __commonJS({ tmp += "\\"; } escaping = false; - if ((state === "charset" || state === "lang") && char === "'") { - if (state === "charset") { - state = "lang"; + if ((state === STATE_CHARSET || state === STATE_LANG) && char === "'") { + if (state === STATE_CHARSET) { + state = STATE_LANG; charset = tmp.substring(1); } else { - state = "value"; + state = STATE_VALUE; } tmp = ""; continue; - } else if (state === "key" && (char === "*" || char === "=") && res.length) { - if (char === "*") { - state = "charset"; - } else { - state = "value"; - } + } else if (state === STATE_KEY && (char === "*" || char === "=") && res.length) { + state = char === "*" ? STATE_CHARSET : STATE_VALUE; res[p] = [tmp, void 0]; tmp = ""; continue; } else if (!inquote && char === ";") { - state = "key"; + state = STATE_KEY; if (charset) { if (tmp.length) { tmp = decodeText( @@ -6475,10 +7041,26 @@ var require_request2 = __commonJS({ this.bodyTimeout = bodyTimeout; this.throwOnError = throwOnError === true; this.method = method; + this.abort = null; if (body == null) { this.body = null; } else if (util.isStream(body)) { this.body = body; + const rState = this.body._readableState; + if (!rState || !rState.autoDestroy) { + this.endHandler = /* @__PURE__ */ __name(function autoDestroy() { + util.destroy(this); + }, "autoDestroy"); + this.body.on("end", this.endHandler); + } + this.errorHandler = (err) => { + if (this.abort) { + this.abort(err); + } else { + this.error = err; + } + }; + this.body.on("error", this.errorHandler); } else if (util.isBuffer(body)) { this.body = body.byteLength ? body : null; } else if (ArrayBuffer.isView(body)) { @@ -6572,7 +7154,12 @@ var require_request2 = __commonJS({ onConnect(abort) { assert(!this.aborted); assert(!this.completed); - return this[kHandler].onConnect(abort); + if (this.error) { + abort(this.error); + } else { + this.abort = abort; + return this[kHandler].onConnect(abort); + } } onHeaders(statusCode, headers, resume, statusText) { assert(!this.aborted); @@ -6593,6 +7180,7 @@ var require_request2 = __commonJS({ return this[kHandler].onUpgrade(statusCode, headers, socket); } onComplete(trailers) { + this.onFinally(); assert(!this.aborted); this.completed = true; if (channels.trailers.hasSubscribers) { @@ -6601,6 +7189,7 @@ var require_request2 = __commonJS({ return this[kHandler].onComplete(trailers); } onError(error) { + this.onFinally(); if (channels.error.hasSubscribers) { channels.error.publish({ request: this, error }); } @@ -6610,6 +7199,16 @@ var require_request2 = __commonJS({ this.aborted = true; return this[kHandler].onError(error); } + onFinally() { + if (this.errorHandler) { + this.body.off("error", this.errorHandler); + this.errorHandler = null; + } + if (this.endHandler) { + this.body.off("end", this.endHandler); + this.endHandler = null; + } + } // TODO: adjust to support H2 addHeader(key, value) { processHeader(this, key, value); @@ -8567,21 +9166,7 @@ var require_client = __commonJS({ if (client[kRunning] > 0 && (request.upgrade || request.method === "CONNECT")) { return; } - if (util.isStream(request.body) && util.bodyLength(request.body) === 0) { - request.body.on( - "data", - /* istanbul ignore next */ - function() { - assert(false); - } - ).on("error", function(err) { - errorRequest(client, request, err); - }).on("end", function() { - util.destroy(this); - }); - request.body = null; - } - if (client[kRunning] > 0 && (util.isStream(request.body) || util.isAsyncIterable(request.body))) { + if (client[kRunning] > 0 && util.bodyLength(request.body) !== 0 && (util.isStream(request.body) || util.isAsyncIterable(request.body))) { return; } if (!request.aborted && write(client, request)) { @@ -8592,6 +9177,10 @@ var require_client = __commonJS({ } } __name(_resume, "_resume"); + function shouldSendContentLength(method) { + return method !== "GET" && method !== "HEAD" && method !== "OPTIONS" && method !== "TRACE" && method !== "CONNECT"; + } + __name(shouldSendContentLength, "shouldSendContentLength"); function write(client, request) { if (client[kHTTPConnVersion] === "h2") { writeH2(client, client[kHTTP2Session], request); @@ -8602,14 +9191,15 @@ var require_client = __commonJS({ if (body && typeof body.read === "function") { body.read(0); } - let contentLength = util.bodyLength(body); + const bodyLength = util.bodyLength(body); + let contentLength = bodyLength; if (contentLength === null) { contentLength = request.contentLength; } if (contentLength === 0 && !expectsPayload) { contentLength = null; } - if (request.contentLength !== null && request.contentLength !== contentLength) { + if (shouldSendContentLength(method) && contentLength > 0 && request.contentLength !== null && request.contentLength !== contentLength) { if (client[kStrictContentLength]) { errorRequest(client, request, new RequestContentLengthMismatchError()); return false; @@ -8669,7 +9259,7 @@ upgrade: ${upgrade}\r if (channels.sendHeaders.hasSubscribers) { channels.sendHeaders.publish({ request, headers: header, socket }); } - if (!body) { + if (!body || bodyLength === 0) { if (contentLength === 0) { socket.write(`${header}content-length: 0\r \r @@ -8769,7 +9359,7 @@ upgrade: ${upgrade}\r if (contentLength === 0 || !expectsPayload) { contentLength = null; } - if (request.contentLength != null && request.contentLength !== contentLength) { + if (shouldSendContentLength(method) && contentLength > 0 && request.contentLength != null && request.contentLength !== contentLength) { if (client[kStrictContentLength]) { errorRequest(client, request, new RequestContentLengthMismatchError()); return false; @@ -9241,7 +9831,7 @@ var require_pool = __commonJS({ maxCachedSessions, allowH2, socketPath, - timeout: connectTimeout == null ? 1e4 : connectTimeout, + timeout: connectTimeout, ...util.nodeHasAutoSelectFamily && autoSelectFamily ? { autoSelectFamily, autoSelectFamilyAttemptTimeout } : void 0, ...connect }); diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md index 176fe72be4c617..d706d08c55d497 100644 --- a/doc/contributing/maintaining/maintaining-dependencies.md +++ b/doc/contributing/maintaining/maintaining-dependencies.md @@ -28,7 +28,7 @@ This a list of all the dependencies: * [openssl 3.0.8][] * [postject 1.0.0-alpha.6][] * [simdutf 4.0.4][] -* [undici 5.27.0][] +* [undici 5.27.2][] * [uvwasi 0.0.19][] * [V8 11.3.244.8][] * [zlib 1.2.13.1-motley-dfc48fc][] @@ -291,7 +291,7 @@ The [postject](https://github.com/nodejs/postject) dependency is used for the The [simdutf](https://github.com/simdutf/simdutf) dependency is a C++ library for fast UTF-8 decoding and encoding. -### undici 5.27.0 +### undici 5.27.2 The [undici](https://github.com/nodejs/undici) dependency is an HTTP/1.1 client, written from scratch for Node.js.. @@ -345,7 +345,7 @@ performance improvements not currently available in standard zlib. [openssl 3.0.8]: #openssl-308 [postject 1.0.0-alpha.6]: #postject-100-alpha6 [simdutf 4.0.4]: #simdutf-404 -[undici 5.27.0]: #undici-5270 +[undici 5.27.2]: #undici-5272 [update-openssl-action]: ../../../.github/workflows/update-openssl.yml [uvwasi 0.0.19]: #uvwasi-0019 [v8 11.3.244.8]: #v8-1132448 diff --git a/src/undici_version.h b/src/undici_version.h index 930c2318ec80bd..669a1aef41f1cf 100644 --- a/src/undici_version.h +++ b/src/undici_version.h @@ -2,5 +2,5 @@ // Refer to tools/dep_updaters/update-undici.sh #ifndef SRC_UNDICI_VERSION_H_ #define SRC_UNDICI_VERSION_H_ -#define UNDICI_VERSION "5.27.0" +#define UNDICI_VERSION "5.27.2" #endif // SRC_UNDICI_VERSION_H_ From 05e25e0230e9f23b4ff07ff7111d0822cc032550 Mon Sep 17 00:00:00 2001 From: CanadaHonk Date: Thu, 23 Nov 2023 00:25:15 +0000 Subject: [PATCH 213/229] fs: improve error perf of sync `lstat`+`fstat` PR-URL: https://github.com/nodejs/node/pull/49868 Refs: https://github.com/nodejs/performance/issues/106 Reviewed-By: Yagiz Nizipli Reviewed-By: Joyee Cheung Reviewed-By: James M Snell --- benchmark/fs/bench-statSync-failure.js | 22 ++++++--- benchmark/fs/bench-statSync.js | 2 +- lib/fs.js | 68 +++++++++++--------------- src/node_file.cc | 43 +++++++++------- test/parallel/test-fs-sync-fd-leak.js | 9 +--- typings/internalBinding/fs.d.ts | 12 ++--- 6 files changed, 79 insertions(+), 77 deletions(-) diff --git a/benchmark/fs/bench-statSync-failure.js b/benchmark/fs/bench-statSync-failure.js index de6128d8fc1845..b1005a78a7f6e0 100644 --- a/benchmark/fs/bench-statSync-failure.js +++ b/benchmark/fs/bench-statSync-failure.js @@ -5,21 +5,29 @@ const fs = require('fs'); const path = require('path'); const bench = common.createBenchmark(main, { - n: [1e6], - statSyncType: ['throw', 'noThrow'], + n: [1e4], + statSyncType: ['fstatSync', 'lstatSync', 'statSync'], + throwType: [ 'throw', 'noThrow' ], +}, { + // fstatSync does not support throwIfNoEntry + combinationFilter: ({ statSyncType, throwType }) => !(statSyncType === 'fstatSync' && throwType === 'noThrow'), }); -function main({ n, statSyncType }) { - const arg = path.join(__dirname, 'non.existent'); +function main({ n, statSyncType, throwType }) { + const arg = (statSyncType === 'fstatSync' ? + (1 << 30) : + path.join(__dirname, 'non.existent')); + + const fn = fs[statSyncType]; bench.start(); for (let i = 0; i < n; i++) { - if (statSyncType === 'noThrow') { - fs.statSync(arg, { throwIfNoEntry: false }); + if (throwType === 'noThrow') { + fn(arg, { throwIfNoEntry: false }); } else { try { - fs.statSync(arg); + fn(arg); } catch { // Continue regardless of error. } diff --git a/benchmark/fs/bench-statSync.js b/benchmark/fs/bench-statSync.js index 4ec2e860fb6ba7..e6680fe7cb85fe 100644 --- a/benchmark/fs/bench-statSync.js +++ b/benchmark/fs/bench-statSync.js @@ -4,7 +4,7 @@ const common = require('../common'); const fs = require('fs'); const bench = common.createBenchmark(main, { - n: [1e6], + n: [1e4], statSyncType: ['fstatSync', 'lstatSync', 'statSync'], }); diff --git a/lib/fs.js b/lib/fs.js index d8974e67128fb0..ff32c5da123fea 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -74,8 +74,6 @@ const { ERR_INVALID_ARG_VALUE, }, AbortError, - uvErrmapGet, - uvException, } = require('internal/errors'); const { @@ -402,11 +400,9 @@ function readFile(path, options, callback) { } function tryStatSync(fd, isUserFd) { - const ctx = {}; - const stats = binding.fstat(fd, false, undefined, ctx); - if (ctx.errno !== undefined && !isUserFd) { + const stats = binding.fstat(fd, false, undefined, true /* shouldNotThrow */); + if (stats === undefined && !isUserFd) { fs.closeSync(fd); - throw uvException(ctx); } return stats; } @@ -1614,19 +1610,6 @@ function statfs(path, options = { bigint: false }, callback) { binding.statfs(pathModule.toNamespacedPath(path), options.bigint, req); } -function hasNoEntryError(ctx) { - if (ctx.errno) { - const uvErr = uvErrmapGet(ctx.errno); - return uvErr?.[0] === 'ENOENT'; - } - - if (ctx.error) { - return ctx.error.code === 'ENOENT'; - } - - return false; -} - /** * Synchronously retrieves the `fs.Stats` for * the file descriptor. @@ -1634,13 +1617,14 @@ function hasNoEntryError(ctx) { * @param {{ * bigint?: boolean; * }} [options] - * @returns {Stats} + * @returns {Stats | undefined} */ function fstatSync(fd, options = { bigint: false }) { fd = getValidatedFd(fd); - const ctx = { fd }; - const stats = binding.fstat(fd, options.bigint, undefined, ctx); - handleErrorFromBinding(ctx); + const stats = binding.fstat(fd, options.bigint, undefined, false); + if (stats === undefined) { + return; + } return getStatsFromBinding(stats); } @@ -1652,17 +1636,20 @@ function fstatSync(fd, options = { bigint: false }) { * bigint?: boolean; * throwIfNoEntry?: boolean; * }} [options] - * @returns {Stats} + * @returns {Stats | undefined} */ function lstatSync(path, options = { bigint: false, throwIfNoEntry: true }) { path = getValidatedPath(path); - const ctx = { path }; - const stats = binding.lstat(pathModule.toNamespacedPath(path), - options.bigint, undefined, ctx); - if (options.throwIfNoEntry === false && hasNoEntryError(ctx)) { - return undefined; + const stats = binding.lstat( + pathModule.toNamespacedPath(path), + options.bigint, + undefined, + options.throwIfNoEntry, + ); + + if (stats === undefined) { + return; } - handleErrorFromBinding(ctx); return getStatsFromBinding(stats); } @@ -2665,9 +2652,10 @@ function realpathSync(p, options) { // On windows, check that the root exists. On unix there is no need. if (isWindows) { - const ctx = { path: base }; - binding.lstat(pathModule.toNamespacedPath(base), false, undefined, ctx); - handleErrorFromBinding(ctx); + const out = binding.lstat(pathModule.toNamespacedPath(base), false, undefined, true /* throwIfNoEntry */); + if (out === undefined) { + return; + } knownHard.add(base); } @@ -2707,9 +2695,10 @@ function realpathSync(p, options) { // for our internal use. const baseLong = pathModule.toNamespacedPath(base); - const ctx = { path: base }; - const stats = binding.lstat(baseLong, true, undefined, ctx); - handleErrorFromBinding(ctx); + const stats = binding.lstat(baseLong, true, undefined, true /* throwIfNoEntry */); + if (stats === undefined) { + return; + } if (!isFileType(stats, S_IFLNK)) { knownHard.add(base); @@ -2748,9 +2737,10 @@ function realpathSync(p, options) { // On windows, check that the root exists. On unix there is no need. if (isWindows && !knownHard.has(base)) { - const ctx = { path: base }; - binding.lstat(pathModule.toNamespacedPath(base), false, undefined, ctx); - handleErrorFromBinding(ctx); + const out = binding.lstat(pathModule.toNamespacedPath(base), false, undefined, true /* throwIfNoEntry */); + if (out === undefined) { + return; + } knownHard.add(base); } } diff --git a/src/node_file.cc b/src/node_file.cc index b3c52e04fac8f0..44972d9ec22fd1 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -1215,21 +1215,26 @@ static void LStat(const FunctionCallbackInfo& args) { CHECK_NOT_NULL(*path); bool use_bigint = args[1]->IsTrue(); - FSReqBase* req_wrap_async = GetReqWrap(args, 2, use_bigint); - if (req_wrap_async != nullptr) { // lstat(path, use_bigint, req) + if (!args[2]->IsUndefined()) { // lstat(path, use_bigint, req) + FSReqBase* req_wrap_async = GetReqWrap(args, 2, use_bigint); FS_ASYNC_TRACE_BEGIN1( UV_FS_LSTAT, req_wrap_async, "path", TRACE_STR_COPY(*path)) AsyncCall(env, req_wrap_async, args, "lstat", UTF8, AfterStat, uv_fs_lstat, *path); - } else { // lstat(path, use_bigint, undefined, ctx) - CHECK_EQ(argc, 4); - FSReqWrapSync req_wrap_sync; + } else { // lstat(path, use_bigint, undefined, throw_if_no_entry) + bool do_not_throw_if_no_entry = args[3]->IsFalse(); + FSReqWrapSync req_wrap_sync("lstat", *path); FS_SYNC_TRACE_BEGIN(lstat); - int err = SyncCall(env, args[3], &req_wrap_sync, "lstat", uv_fs_lstat, - *path); + int result; + if (do_not_throw_if_no_entry) { + result = SyncCallAndThrowIf( + is_uv_error_except_no_entry, env, &req_wrap_sync, uv_fs_lstat, *path); + } else { + result = SyncCallAndThrowOnError(env, &req_wrap_sync, uv_fs_lstat, *path); + } FS_SYNC_TRACE_END(lstat); - if (err != 0) { - return; // error info is in ctx + if (is_uv_error(result)) { + return; } Local arr = FillGlobalStatsArray(binding_data, use_bigint, @@ -1250,19 +1255,23 @@ static void FStat(const FunctionCallbackInfo& args) { int fd = args[0].As()->Value(); bool use_bigint = args[1]->IsTrue(); - FSReqBase* req_wrap_async = GetReqWrap(args, 2, use_bigint); - if (req_wrap_async != nullptr) { // fstat(fd, use_bigint, req) + if (!args[2]->IsUndefined()) { // fstat(fd, use_bigint, req) + FSReqBase* req_wrap_async = GetReqWrap(args, 2, use_bigint); FS_ASYNC_TRACE_BEGIN0(UV_FS_FSTAT, req_wrap_async) AsyncCall(env, req_wrap_async, args, "fstat", UTF8, AfterStat, uv_fs_fstat, fd); - } else { // fstat(fd, use_bigint, undefined, ctx) - CHECK_EQ(argc, 4); - FSReqWrapSync req_wrap_sync; + } else { // fstat(fd, use_bigint, undefined, do_not_throw_error) + bool do_not_throw_error = args[2]->IsTrue(); + const auto should_throw = [do_not_throw_error](int result) { + return is_uv_error(result) && !do_not_throw_error; + }; + FSReqWrapSync req_wrap_sync("fstat"); FS_SYNC_TRACE_BEGIN(fstat); - int err = SyncCall(env, args[3], &req_wrap_sync, "fstat", uv_fs_fstat, fd); + int err = + SyncCallAndThrowIf(should_throw, env, &req_wrap_sync, uv_fs_fstat, fd); FS_SYNC_TRACE_END(fstat); - if (err != 0) { - return; // error info is in ctx + if (is_uv_error(err)) { + return; } Local arr = FillGlobalStatsArray(binding_data, use_bigint, diff --git a/test/parallel/test-fs-sync-fd-leak.js b/test/parallel/test-fs-sync-fd-leak.js index 52f6f38f9c0ea0..8a9eb39a67a906 100644 --- a/test/parallel/test-fs-sync-fd-leak.js +++ b/test/parallel/test-fs-sync-fd-leak.js @@ -25,7 +25,6 @@ require('../common'); const assert = require('assert'); const fs = require('fs'); const { internalBinding } = require('internal/test/binding'); -const { UV_EBADF } = internalBinding('uv'); // Ensure that (read|write|append)FileSync() closes the file descriptor fs.openSync = function() { @@ -42,15 +41,11 @@ fs.writeSync = function() { throw new Error('BAM'); }; -internalBinding('fs').fstat = function(fd, bigint, _, ctx) { - ctx.errno = UV_EBADF; - ctx.syscall = 'fstat'; +internalBinding('fs').fstat = function() { + throw new Error('EBADF: bad file descriptor, fstat'); }; let close_called = 0; -ensureThrows(function() { - fs.readFileSync('dummy'); -}, 'EBADF: bad file descriptor, fstat'); ensureThrows(function() { fs.writeFileSync('dummy', 'xxx'); }, 'BAM'); diff --git a/typings/internalBinding/fs.d.ts b/typings/internalBinding/fs.d.ts index 67c14c73acf7f1..2d45a2ff40a8f1 100644 --- a/typings/internalBinding/fs.d.ts +++ b/typings/internalBinding/fs.d.ts @@ -92,9 +92,9 @@ declare namespace InternalFSBinding { function fstat(fd: number, useBigint: boolean, req: FSReqCallback): void; function fstat(fd: number, useBigint: true, req: FSReqCallback): void; function fstat(fd: number, useBigint: false, req: FSReqCallback): void; - function fstat(fd: number, useBigint: boolean, req: undefined, ctx: FSSyncContext): Float64Array | BigUint64Array; - function fstat(fd: number, useBigint: true, req: undefined, ctx: FSSyncContext): BigUint64Array; - function fstat(fd: number, useBigint: false, req: undefined, ctx: FSSyncContext): Float64Array; + function fstat(fd: number, useBigint: boolean, req: undefined, shouldNotThrow: boolean): Float64Array | BigUint64Array; + function fstat(fd: number, useBigint: true, req: undefined, shouldNotThrow: boolean): BigUint64Array; + function fstat(fd: number, useBigint: false, req: undefined, shouldNotThrow: boolean): Float64Array; function fstat(fd: number, useBigint: boolean, usePromises: typeof kUsePromises): Promise; function fstat(fd: number, useBigint: true, usePromises: typeof kUsePromises): Promise; function fstat(fd: number, useBigint: false, usePromises: typeof kUsePromises): Promise; @@ -127,9 +127,9 @@ declare namespace InternalFSBinding { function lstat(path: StringOrBuffer, useBigint: boolean, req: FSReqCallback): void; function lstat(path: StringOrBuffer, useBigint: true, req: FSReqCallback): void; function lstat(path: StringOrBuffer, useBigint: false, req: FSReqCallback): void; - function lstat(path: StringOrBuffer, useBigint: boolean, req: undefined, ctx: FSSyncContext): Float64Array | BigUint64Array; - function lstat(path: StringOrBuffer, useBigint: true, req: undefined, ctx: FSSyncContext): BigUint64Array; - function lstat(path: StringOrBuffer, useBigint: false, req: undefined, ctx: FSSyncContext): Float64Array; + function lstat(path: StringOrBuffer, useBigint: boolean, req: undefined, throwIfNoEntry: boolean): Float64Array | BigUint64Array; + function lstat(path: StringOrBuffer, useBigint: true, req: undefined, throwIfNoEntry: boolean): BigUint64Array; + function lstat(path: StringOrBuffer, useBigint: false, req: undefined, throwIfNoEntry: boolean): Float64Array; function lstat(path: StringOrBuffer, useBigint: boolean, usePromises: typeof kUsePromises): Promise; function lstat(path: StringOrBuffer, useBigint: true, usePromises: typeof kUsePromises): Promise; function lstat(path: StringOrBuffer, useBigint: false, usePromises: typeof kUsePromises): Promise; From 3b6b1afa47bae2d86a9b039e696d3fc48e99e304 Mon Sep 17 00:00:00 2001 From: Cheng Zhao Date: Sat, 25 Nov 2023 13:30:50 +0900 Subject: [PATCH 214/229] v8,tools: expose necessary V8 defines PR-URL: https://github.com/nodejs/node/pull/50820 Reviewed-By: Jiawen Geng Reviewed-By: James M Snell --- common.gypi | 60 +++++++++++++--- configure.py | 2 + tools/generate_config_gypi.py | 118 ++++++++++++++++++++++++-------- tools/v8_gypfiles/features.gypi | 37 +++++++--- unofficial.gni | 9 +-- 5 files changed, 177 insertions(+), 49 deletions(-) diff --git a/common.gypi b/common.gypi index 4c856c0912260c..db09a8a33df066 100644 --- a/common.gypi +++ b/common.gypi @@ -75,8 +75,16 @@ 'v8_win64_unwinding_info': 1, - # TODO(refack): make v8-perfetto happen + # Variables controlling external defines exposed in public headers. + 'v8_enable_conservative_stack_scanning%': 0, + 'v8_enable_direct_local%': 0, + 'v8_enable_map_packing%': 0, + 'v8_enable_pointer_compression_shared_cage%': 0, + 'v8_enable_sandbox%': 0, + 'v8_enable_v8_checks%': 0, + 'v8_enable_zone_compression%': 0, 'v8_use_perfetto': 0, + 'tsan%': 0, ##### end V8 defaults ##### @@ -134,7 +142,7 @@ }], ], }, - 'defines': [ 'DEBUG', '_DEBUG', 'V8_ENABLE_CHECKS' ], + 'defines': [ 'DEBUG', '_DEBUG' ], 'cflags': [ '-g', '-O0' ], 'conditions': [ ['OS in "aix os400"', { @@ -257,11 +265,8 @@ } }, - # Defines these mostly for node-gyp to pickup, and warn addon authors of - # imminent V8 deprecations, also to sync how dependencies are configured. + # Defines these mostly for node-gyp to pickup. 'defines': [ - 'V8_DEPRECATION_WARNINGS', - 'V8_IMMINENT_DEPRECATION_WARNINGS', '_GLIBCXX_USE_CXX11_ABI=1', ], @@ -369,15 +374,50 @@ }], ], }], + # The defines bellow must include all things from the external_v8_defines + # list in v8/BUILD.gn. + ['v8_enable_v8_checks == 1', { + 'defines': ['V8_ENABLE_CHECKS'], + }], ['v8_enable_pointer_compression == 1', { - 'defines': [ - 'V8_COMPRESS_POINTERS', - 'V8_COMPRESS_POINTERS_IN_ISOLATE_CAGE', - ], + 'defines': ['V8_COMPRESS_POINTERS'], + }], + ['v8_enable_pointer_compression_shared_cage == 1', { + 'defines': ['V8_COMPRESS_POINTERS_IN_SHARED_CAGE'], + }], + ['v8_enable_pointer_compression == 1 and v8_enable_pointer_compression_shared_cage != 1', { + 'defines': ['V8_COMPRESS_POINTERS_IN_ISOLATE_CAGE'], }], ['v8_enable_pointer_compression == 1 or v8_enable_31bit_smis_on_64bit_arch == 1', { 'defines': ['V8_31BIT_SMIS_ON_64BIT_ARCH'], }], + ['v8_enable_zone_compression == 1', { + 'defines': ['V8_COMPRESS_ZONES',], + }], + ['v8_enable_sandbox == 1', { + 'defines': ['V8_ENABLE_SANDBOX',], + }], + ['v8_deprecation_warnings == 1', { + 'defines': ['V8_DEPRECATION_WARNINGS',], + }], + ['v8_imminent_deprecation_warnings == 1', { + 'defines': ['V8_IMMINENT_DEPRECATION_WARNINGS',], + }], + ['v8_use_perfetto == 1', { + 'defines': ['V8_USE_PERFETTO',], + }], + ['v8_enable_map_packing == 1', { + 'defines': ['V8_MAP_PACKING',], + }], + ['tsan == 1', { + 'defines': ['V8_IS_TSAN',], + }], + ['v8_enable_conservative_stack_scanning == 1', { + 'defines': ['V8_ENABLE_CONSERVATIVE_STACK_SCANNING',], + }], + ['v8_enable_direct_local == 1', { + 'defines': ['V8_ENABLE_DIRECT_LOCAL',], + }], ['OS == "win"', { 'defines': [ 'WIN32', diff --git a/configure.py b/configure.py index 7880f758e948db..84b016cd853080 100755 --- a/configure.py +++ b/configure.py @@ -1504,6 +1504,8 @@ def configure_v8(o): o['variables']['v8_enable_pointer_compression'] = 1 if options.enable_pointer_compression else 0 o['variables']['v8_enable_31bit_smis_on_64bit_arch'] = 1 if options.enable_pointer_compression else 0 o['variables']['v8_enable_shared_ro_heap'] = 0 if options.enable_pointer_compression or options.disable_shared_ro_heap else 1 + o['variables']['v8_enable_extensible_ro_snapshot'] = 0 + o['variables']['v8_enable_v8_checks'] = 1 if options.debug else 0 o['variables']['v8_trace_maps'] = 1 if options.trace_maps else 0 o['variables']['node_use_v8_platform'] = b(not options.without_v8_platform) o['variables']['node_use_bundled_v8'] = b(not options.without_bundled_v8) diff --git a/tools/generate_config_gypi.py b/tools/generate_config_gypi.py index 26cc5f04201fec..948a740529ec34 100755 --- a/tools/generate_config_gypi.py +++ b/tools/generate_config_gypi.py @@ -7,69 +7,133 @@ # This script reads the configurations of GN and outputs a config.gypi file that # will be used to populate process.config.variables. +import argparse import re import os import subprocess import sys -root_dir = os.path.dirname(os.path.dirname(__file__)) -sys.path.append(os.path.join(root_dir, 'node', 'tools')) -import getmoduleversion +sys.path.append(os.path.dirname(__file__)) import getnapibuildversion +# The defines bellow must include all things from the external_v8_defines list +# in v8/BUILD.gn. +# TODO(zcbenz): Import from v8_features.json once this change gets into Node: +# https://chromium-review.googlesource.com/c/v8/v8/+/5040612 +V8_FEATURE_DEFINES = { + 'v8_enable_v8_checks': 'V8_ENABLE_CHECKS', + 'v8_enable_pointer_compression': 'V8_COMPRESS_POINTERS', + 'v8_enable_pointer_compression_shared_cage': 'V8_COMPRESS_POINTERS_IN_SHARED_CAGE', + 'v8_enable_31bit_smis_on_64bit_arch': 'V8_31BIT_SMIS_ON_64BIT_ARCH', + 'v8_enable_zone_compression': 'V8_COMPRESS_ZONES', + 'v8_enable_sandbox': 'V8_ENABLE_SANDBOX', + 'v8_deprecation_warnings': 'V8_DEPRECATION_WARNINGS', + 'v8_imminent_deprecation_warnings': 'V8_IMMINENT_DEPRECATION_WARNINGS', + 'v8_use_perfetto': 'V8_USE_PERFETTO', + 'v8_enable_map_packing': 'V8_MAP_PACKING', + 'tsan': 'V8_IS_TSAN', + 'v8_enable_conservative_stack_scanning': 'V8_ENABLE_CONSERVATIVE_STACK_SCANNING', + 'v8_enable_direct_local': 'V8_ENABLE_DIRECT_LOCAL', +} + +# Regex used for parsing results of "gn args". GN_RE = re.compile(r'(\w+)\s+=\s+(.*?)$', re.MULTILINE) +if sys.platform == 'win32': + GN = 'gn.exe' +else: + GN = 'gn' + +def bool_to_number(v): + return 1 if v else 0 + def bool_string_to_number(v): - return 1 if v == 'true' else 0 + return bool_to_number(v == 'true') + +def get_gn_config(out_dir): + # Read args from GN configurations. + gn_args = subprocess.check_output( + [GN, 'args', '--list', '--short', '-C', out_dir]) + config = dict(re.findall(GN_RE, gn_args.decode())) + # Get napi_build_version from Node, which is not part of GN args. + config['napi_build_version'] = getnapibuildversion.get_napi_version() + return config + +def get_v8_config(out_dir, node_gn_path): + # For args that have default values in V8's GN configurations, we can not rely + # on the values printed by "gn args", because most of them would be empty + # strings, and the actual value would depend on the logics in v8/BUILD.gn. + # So we print out the defines and deduce the feature from them instead. + node_defines = subprocess.check_output( + [GN, 'desc', '-C', out_dir, node_gn_path + ":libnode", 'defines']).decode().split('\n') + v8_config = {} + for feature, define in V8_FEATURE_DEFINES.items(): + v8_config[feature] = bool_to_number(define in node_defines) + return v8_config -def translate_config(config): - return { +def translate_config(out_dir, config, v8_config): + config_gypi = { 'target_defaults': { 'default_configuration': 'Debug' if config['is_debug'] == 'true' else 'Release', }, 'variables': { 'asan': bool_string_to_number(config['is_asan']), + 'enable_lto': config['use_thin_lto'], + 'is_debug': bool_string_to_number(config['is_debug']), 'llvm_version': 13, 'napi_build_version': config['napi_build_version'], 'node_builtin_shareable_builtins': eval(config['node_builtin_shareable_builtins']), 'node_module_version': int(config['node_module_version']), - 'node_shared': bool_string_to_number(config['is_component_build']), 'node_use_openssl': config['node_use_openssl'], 'node_use_node_code_cache': config['node_use_node_code_cache'], 'node_use_node_snapshot': config['node_use_node_snapshot'], - 'v8_enable_31bit_smis_on_64bit_arch': - bool_string_to_number(config['v8_enable_31bit_smis_on_64bit_arch']), - 'v8_enable_pointer_compression': - bool_string_to_number(config['v8_enable_pointer_compression']), 'v8_enable_i18n_support': bool_string_to_number(config['v8_enable_i18n_support']), 'v8_enable_inspector': # this is actually a node misnomer bool_string_to_number(config['node_enable_inspector']), 'shlib_suffix': 'dylib' if sys.platform == 'darwin' else 'so', + 'tsan': bool_string_to_number(config['is_tsan']), + # TODO(zcbenz): Shared components are not supported in GN config yet. + 'node_shared': 'false', + 'node_shared_brotli': 'false', + 'node_shared_cares': 'false', + 'node_shared_http_parser': 'false', + 'node_shared_libuv': 'false', + 'node_shared_nghttp2': 'false', + 'node_shared_nghttp3': 'false', + 'node_shared_ngtcp2': 'false', + 'node_shared_openssl': 'false', + 'node_shared_zlib': 'false', } } + config_gypi['variables'].update(v8_config) + return config_gypi -def main(gn_out_dir, output_file, depfile): - # Get GN config and parse into a dictionary. - if sys.platform == 'win32': - gn = 'gn.exe' - else: - gn = 'gn' - gnconfig = subprocess.check_output( - [gn, 'args', '--list', '--short', '-C', gn_out_dir]) - config = dict(re.findall(GN_RE, gnconfig.decode('utf-8'))) - config['node_module_version'] = getmoduleversion.get_version() - config['napi_build_version'] = getnapibuildversion.get_napi_version() +def main(): + parser = argparse.ArgumentParser( + description='Generate config.gypi file from GN configurations') + parser.add_argument('target', help='path to generated config.gypi file') + parser.add_argument('--out-dir', help='path to the output directory', + default='out/Release') + parser.add_argument('--node-gn-path', help='path of the node target in GN', + default='//node') + parser.add_argument('--dep-file', help='path to an optional dep file', + default=None) + args, unknown_args = parser.parse_known_args() + + config = get_gn_config(args.out_dir) + v8_config = get_v8_config(args.out_dir, args.node_gn_path) # Write output. - with open(output_file, 'w') as f: - f.write(repr(translate_config(config))) + with open(args.target, 'w') as f: + f.write(repr(translate_config(args.out_dir, config, v8_config))) # Write depfile. Force regenerating config.gypi when GN configs change. - with open(depfile, 'w') as f: - f.write('%s: %s '%(output_file, 'build.ninja')) + if args.dep_file: + with open(args.dep_file, 'w') as f: + f.write('%s: %s '%(args.target, 'build.ninja')) if __name__ == '__main__': - main(sys.argv[1], sys.argv[2], sys.argv[3]) + main() diff --git a/tools/v8_gypfiles/features.gypi b/tools/v8_gypfiles/features.gypi index 0227caa7ab5d37..c768d7a0f13dc7 100644 --- a/tools/v8_gypfiles/features.gypi +++ b/tools/v8_gypfiles/features.gypi @@ -155,6 +155,7 @@ # Enable pointer compression (sets -dV8_COMPRESS_POINTERS). 'v8_enable_pointer_compression%': 0, + 'v8_enable_pointer_compression_shared_cage%': 0, 'v8_enable_31bit_smis_on_64bit_arch%': 0, # Sets -dV8_SHORT_BUILTIN_CALLS @@ -197,6 +198,15 @@ # currently implemented. 'v8_use_perfetto%': 0, + # Enable map packing & unpacking (sets -dV8_MAP_PACKING). + 'v8_enable_map_packing%': 0, + + # Scan the call stack conservatively during garbage collection. + 'v8_enable_conservative_stack_scanning%': 0, + + # Use direct pointers in local handles. + 'v8_enable_direct_local%': 0, + # Controls the threshold for on-heap/off-heap Typed Arrays. 'v8_typed_array_max_size_in_heap%': 64, @@ -341,10 +351,13 @@ 'defines': ['ENABLE_VTUNE_JIT_INTERFACE',], }], ['v8_enable_pointer_compression==1', { - 'defines': [ - 'V8_COMPRESS_POINTERS', - 'V8_COMPRESS_POINTERS_IN_ISOLATE_CAGE', - ], + 'defines': ['V8_COMPRESS_POINTERS'], + }], + ['v8_enable_pointer_compression_shared_cage==1', { + 'defines': ['V8_COMPRESS_POINTERS_IN_SHARED_CAGE'], + }], + ['v8_enable_pointer_compression==1 and v8_enable_pointer_compression_shared_cage==0', { + 'defines': ['V8_COMPRESS_POINTERS_IN_ISOLATE_CAGE'], }], ['v8_enable_pointer_compression==1 or v8_enable_31bit_smis_on_64bit_arch==1', { 'defines': ['V8_31BIT_SMIS_ON_64BIT_ARCH',], @@ -388,13 +401,9 @@ }], ['v8_deprecation_warnings==1', { 'defines': ['V8_DEPRECATION_WARNINGS',], - },{ - 'defines!': ['V8_DEPRECATION_WARNINGS',], }], ['v8_imminent_deprecation_warnings==1', { 'defines': ['V8_IMMINENT_DEPRECATION_WARNINGS',], - },{ - 'defines!': ['V8_IMMINENT_DEPRECATION_WARNINGS',], }], ['v8_enable_i18n_support==1', { 'defines': ['V8_INTL_SUPPORT',], @@ -439,9 +448,21 @@ ['v8_use_perfetto==1', { 'defines': ['V8_USE_PERFETTO',], }], + ['v8_enable_map_packing==1', { + 'defines': ['V8_MAP_PACKING',], + }], ['v8_win64_unwinding_info==1', { 'defines': ['V8_WIN64_UNWINDING_INFO',], }], + ['tsan==1', { + 'defines': ['V8_IS_TSAN',], + }], + ['v8_enable_conservative_stack_scanning==1', { + 'defines': ['V8_ENABLE_CONSERVATIVE_STACK_SCANNING',], + }], + ['v8_enable_direct_local==1', { + 'defines': ['V8_ENABLE_DIRECT_LOCAL',], + }], ['v8_enable_regexp_interpreter_threaded_dispatch==1', { 'defines': ['V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH',], }], diff --git a/unofficial.gni b/unofficial.gni index f13df9503ece1b..ee0a5f3c32dbcc 100644 --- a/unofficial.gni +++ b/unofficial.gni @@ -257,10 +257,11 @@ template("node_gn_build") { script = "tools/generate_config_gypi.py" outputs = [ "$target_gen_dir/config.gypi" ] depfile = "$target_gen_dir/$target_name.d" - script_args = [ "$root_build_dir" ] - script_args += outputs - script_args += [ depfile ] - args = rebase_path(script_args, root_build_dir) + args = rebase_path(outputs, root_build_dir) + [ + "--out-dir", rebase_path(root_build_dir, root_build_dir), + "--dep-file", rebase_path(depfile, root_build_dir), + "--node-gn-path", node_path, + ] } executable("node_js2c") { From 4cf155ca0c92bd02733de4c30c5d1d6e36e95b27 Mon Sep 17 00:00:00 2001 From: Filip Skokan Date: Mon, 13 Nov 2023 13:47:46 +0100 Subject: [PATCH 215/229] stream: add Symbol.toStringTag to Compression Streams PR-URL: https://github.com/nodejs/node/pull/50712 Reviewed-By: Yagiz Nizipli Reviewed-By: Matteo Collina --- lib/internal/webstreams/compression.js | 11 +++++++++++ test/parallel/test-whatwg-webstreams-compression.js | 3 +++ 2 files changed, 14 insertions(+) diff --git a/lib/internal/webstreams/compression.js b/lib/internal/webstreams/compression.js index 2ba8e53fa28e73..cda3c4b08f3b5b 100644 --- a/lib/internal/webstreams/compression.js +++ b/lib/internal/webstreams/compression.js @@ -2,6 +2,7 @@ const { ObjectDefineProperties, + SymbolToStringTag, Symbol, } = primordials; @@ -149,11 +150,21 @@ class DecompressionStream { ObjectDefineProperties(CompressionStream.prototype, { readable: kEnumerableProperty, writable: kEnumerableProperty, + [SymbolToStringTag]: { + __proto__: null, + configurable: true, + value: 'CompressionStream', + }, }); ObjectDefineProperties(DecompressionStream.prototype, { readable: kEnumerableProperty, writable: kEnumerableProperty, + [SymbolToStringTag]: { + __proto__: null, + configurable: true, + value: 'DecompressionStream', + }, }); module.exports = { diff --git a/test/parallel/test-whatwg-webstreams-compression.js b/test/parallel/test-whatwg-webstreams-compression.js index 859df4e07bed88..f9ae8d87265b58 100644 --- a/test/parallel/test-whatwg-webstreams-compression.js +++ b/test/parallel/test-whatwg-webstreams-compression.js @@ -15,6 +15,9 @@ async function test(format) { const gzip = new CompressionStream(format); const gunzip = new DecompressionStream(format); + assert.strictEqual(gzip[Symbol.toStringTag], 'CompressionStream'); + assert.strictEqual(gunzip[Symbol.toStringTag], 'DecompressionStream'); + gzip.readable.pipeTo(gunzip.writable).then(common.mustCall()); const reader = gunzip.readable.getReader(); From aa3209b88023854c46f79ea59ae216f19b82889b Mon Sep 17 00:00:00 2001 From: CanadaHonk Date: Mon, 27 Nov 2023 00:01:39 +0000 Subject: [PATCH 216/229] fs: add c++ fast path for writeFileSync utf8 PR-URL: https://github.com/nodejs/node/pull/49884 Reviewed-By: Yagiz Nizipli Reviewed-By: Santiago Gimeno Reviewed-By: Stephen Belanger Reviewed-By: James M Snell --- benchmark/fs/bench-writeFileSync.js | 43 ++++++++++++++ lib/fs.js | 13 +++++ src/node_file.cc | 80 +++++++++++++++++++++++++++ test/parallel/test-fs-sync-fd-leak.js | 17 ++++++ typings/internalBinding/fs.d.ts | 3 + 5 files changed, 156 insertions(+) create mode 100644 benchmark/fs/bench-writeFileSync.js diff --git a/benchmark/fs/bench-writeFileSync.js b/benchmark/fs/bench-writeFileSync.js new file mode 100644 index 00000000000000..29c16f4aa2a12d --- /dev/null +++ b/benchmark/fs/bench-writeFileSync.js @@ -0,0 +1,43 @@ +'use strict'; + +const common = require('../common.js'); +const fs = require('fs'); +const tmpdir = require('../../test/common/tmpdir'); +tmpdir.refresh(); + +// Some variants are commented out as they do not show a change and just slow +const bench = common.createBenchmark(main, { + encoding: ['utf8'], + useFd: ['true', 'false'], + length: [1024, 102400, 1024 * 1024], + + // useBuffer: ['true', 'false'], + useBuffer: ['false'], + + // func: ['appendFile', 'writeFile'], + func: ['writeFile'], + + n: [1e3], +}); + +function main({ n, func, encoding, length, useFd, useBuffer }) { + tmpdir.refresh(); + const enc = encoding === 'undefined' ? undefined : encoding; + const path = tmpdir.resolve(`.writefilesync-file-${Date.now()}`); + + useFd = useFd === 'true'; + const file = useFd ? fs.openSync(path, 'w') : path; + + let data = 'a'.repeat(length); + if (useBuffer === 'true') data = Buffer.from(data, encoding); + + const fn = fs[func + 'Sync']; + + bench.start(); + for (let i = 0; i < n; ++i) { + fn(file, data, enc); + } + bench.end(n); + + if (useFd) fs.closeSync(file); +} diff --git a/lib/fs.js b/lib/fs.js index ff32c5da123fea..a65bd0274e6f82 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -2341,6 +2341,19 @@ function writeFileSync(path, data, options) { validateBoolean(flush, 'options.flush'); + // C++ fast path for string data and UTF8 encoding + if (typeof data === 'string' && (options.encoding === 'utf8' || options.encoding === 'utf-8')) { + if (!isInt32(path)) { + path = pathModule.toNamespacedPath(getValidatedPath(path)); + } + + return binding.writeFileUtf8( + path, data, + stringToFlags(options.flag), + parseFileMode(options.mode, 'mode', 0o666), + ); + } + if (!isArrayBufferView(data)) { validateStringAfterArrayBufferView(data, 'data'); data = Buffer.from(data, options.encoding || 'utf8'); diff --git a/src/node_file.cc b/src/node_file.cc index 44972d9ec22fd1..f3d8c35d4963d8 100644 --- a/src/node_file.cc +++ b/src/node_file.cc @@ -2327,6 +2327,84 @@ static void WriteString(const FunctionCallbackInfo& args) { } } +static void WriteFileUtf8(const FunctionCallbackInfo& args) { + // Fast C++ path for fs.writeFileSync(path, data) with utf8 encoding + // (file, data, options.flag, options.mode) + + Environment* env = Environment::GetCurrent(args); + auto isolate = env->isolate(); + + CHECK_EQ(args.Length(), 4); + + BufferValue value(isolate, args[1]); + CHECK_NOT_NULL(*value); + + CHECK(args[2]->IsInt32()); + const int flags = args[2].As()->Value(); + + CHECK(args[3]->IsInt32()); + const int mode = args[3].As()->Value(); + + uv_file file; + + bool is_fd = args[0]->IsInt32(); + + // Check for file descriptor + if (is_fd) { + file = args[0].As()->Value(); + } else { + BufferValue path(isolate, args[0]); + CHECK_NOT_NULL(*path); + if (CheckOpenPermissions(env, path, flags).IsNothing()) return; + + FSReqWrapSync req_open("open", *path); + + FS_SYNC_TRACE_BEGIN(open); + file = + SyncCallAndThrowOnError(env, &req_open, uv_fs_open, *path, flags, mode); + FS_SYNC_TRACE_END(open); + + if (is_uv_error(file)) { + return; + } + } + + int bytesWritten = 0; + uint32_t offset = 0; + + const size_t length = value.length(); + uv_buf_t uvbuf = uv_buf_init(value.out(), length); + + FS_SYNC_TRACE_BEGIN(write); + while (offset < length) { + FSReqWrapSync req_write("write"); + bytesWritten = SyncCallAndThrowOnError( + env, &req_write, uv_fs_write, file, &uvbuf, 1, -1); + + // Write errored out + if (bytesWritten < 0) { + break; + } + + offset += bytesWritten; + DCHECK_LE(offset, length); + uvbuf.base += bytesWritten; + uvbuf.len -= bytesWritten; + } + FS_SYNC_TRACE_END(write); + + if (!is_fd) { + FSReqWrapSync req_close("close"); + + FS_SYNC_TRACE_BEGIN(close); + int result = SyncCallAndThrowOnError(env, &req_close, uv_fs_close, file); + FS_SYNC_TRACE_END(close); + + if (is_uv_error(result)) { + return; + } + } +} /* * Wrapper for read(2). @@ -3169,6 +3247,7 @@ static void CreatePerIsolateProperties(IsolateData* isolate_data, SetMethod(isolate, target, "writeBuffer", WriteBuffer); SetMethod(isolate, target, "writeBuffers", WriteBuffers); SetMethod(isolate, target, "writeString", WriteString); + SetMethod(isolate, target, "writeFileUtf8", WriteFileUtf8); SetMethod(isolate, target, "realpath", RealPath); SetMethod(isolate, target, "copyFile", CopyFile); @@ -3289,6 +3368,7 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(WriteBuffer); registry->Register(WriteBuffers); registry->Register(WriteString); + registry->Register(WriteFileUtf8); registry->Register(RealPath); registry->Register(CopyFile); diff --git a/test/parallel/test-fs-sync-fd-leak.js b/test/parallel/test-fs-sync-fd-leak.js index 8a9eb39a67a906..1abb75964a46e2 100644 --- a/test/parallel/test-fs-sync-fd-leak.js +++ b/test/parallel/test-fs-sync-fd-leak.js @@ -41,17 +41,34 @@ fs.writeSync = function() { throw new Error('BAM'); }; +// Internal fast paths are pure C++, can't error inside write +internalBinding('fs').writeFileUtf8 = function() { + // Fake close + close_called++; + throw new Error('BAM'); +}; + internalBinding('fs').fstat = function() { throw new Error('EBADF: bad file descriptor, fstat'); }; let close_called = 0; ensureThrows(function() { + // Fast path: writeFileSync utf8 fs.writeFileSync('dummy', 'xxx'); }, 'BAM'); ensureThrows(function() { + // Non-fast path + fs.writeFileSync('dummy', 'xxx', { encoding: 'base64' }); +}, 'BAM'); +ensureThrows(function() { + // Fast path: writeFileSync utf8 fs.appendFileSync('dummy', 'xxx'); }, 'BAM'); +ensureThrows(function() { + // Non-fast path + fs.appendFileSync('dummy', 'xxx', { encoding: 'base64' }); +}, 'BAM'); function ensureThrows(cb, message) { let got_exception = false; diff --git a/typings/internalBinding/fs.d.ts b/typings/internalBinding/fs.d.ts index 2d45a2ff40a8f1..5e7b3482bf6260 100644 --- a/typings/internalBinding/fs.d.ts +++ b/typings/internalBinding/fs.d.ts @@ -231,6 +231,9 @@ declare namespace InternalFSBinding { function writeString(fd: number, value: string, pos: unknown, encoding: unknown, usePromises: typeof kUsePromises): Promise; function getFormatOfExtensionlessFile(url: string): ConstantsBinding['fs']; + + function writeFileUtf8(path: string, data: string, flag: number, mode: number): void; + function writeFileUtf8(fd: number, data: string, flag: number, mode: number): void; } export interface FsBinding { From 95ed4ffc1e4d62b9e8f2c114ad3a14f80b3ebef1 Mon Sep 17 00:00:00 2001 From: Mattias Buelens Date: Sat, 18 Nov 2023 00:20:08 +0100 Subject: [PATCH 217/229] stream: fix enumerability of ReadableStream.prototype.values PR-URL: https://github.com/nodejs/node/pull/50779 Reviewed-By: Filip Skokan Reviewed-By: Yagiz Nizipli Reviewed-By: Luigi Pinca Reviewed-By: Matteo Collina Reviewed-By: James M Snell Reviewed-By: Benjamin Gruenbaum --- lib/internal/webstreams/readablestream.js | 1 + test/wpt/status/streams.json | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/internal/webstreams/readablestream.js b/lib/internal/webstreams/readablestream.js index 1e0546664fc31b..4dc45edea703db 100644 --- a/lib/internal/webstreams/readablestream.js +++ b/lib/internal/webstreams/readablestream.js @@ -639,6 +639,7 @@ ObjectDefineProperties(ReadableStream.prototype, { pipeThrough: kEnumerableProperty, pipeTo: kEnumerableProperty, tee: kEnumerableProperty, + values: kEnumerableProperty, [SymbolToStringTag]: getNonWritablePropertyDescriptor(ReadableStream.name), }); diff --git a/test/wpt/status/streams.json b/test/wpt/status/streams.json index 3471a987e834a2..ec2f71d8908460 100644 --- a/test/wpt/status/streams.json +++ b/test/wpt/status/streams.json @@ -2,7 +2,7 @@ "idlharness.any.js": { "fail": { "expected": [ - "ReadableStream interface: async iterable" + "ReadableStream interface: operation from(any)" ] } }, From a878e3abb0e65033192e87dbe21d84a2e5954621 Mon Sep 17 00:00:00 2001 From: Mattias Buelens Date: Mon, 20 Nov 2023 22:23:24 +0100 Subject: [PATCH 218/229] stream: fix enumerability of ReadableStream.from PR-URL: https://github.com/nodejs/node/pull/50779 Reviewed-By: Filip Skokan Reviewed-By: Yagiz Nizipli Reviewed-By: Luigi Pinca Reviewed-By: Matteo Collina Reviewed-By: James M Snell Reviewed-By: Benjamin Gruenbaum --- lib/internal/webstreams/readablestream.js | 3 +++ test/wpt/status/streams.json | 7 ------- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/internal/webstreams/readablestream.js b/lib/internal/webstreams/readablestream.js index 4dc45edea703db..442b997315c89b 100644 --- a/lib/internal/webstreams/readablestream.js +++ b/lib/internal/webstreams/readablestream.js @@ -642,6 +642,9 @@ ObjectDefineProperties(ReadableStream.prototype, { values: kEnumerableProperty, [SymbolToStringTag]: getNonWritablePropertyDescriptor(ReadableStream.name), }); +ObjectDefineProperties(ReadableStream, { + from: kEnumerableProperty, +}); function TransferredReadableStream() { return makeTransferable(ReflectConstruct( diff --git a/test/wpt/status/streams.json b/test/wpt/status/streams.json index ec2f71d8908460..3b6e0ce6429f9d 100644 --- a/test/wpt/status/streams.json +++ b/test/wpt/status/streams.json @@ -1,11 +1,4 @@ { - "idlharness.any.js": { - "fail": { - "expected": [ - "ReadableStream interface: operation from(any)" - ] - } - }, "idlharness-shadowrealm.window.js": { "skip": "ShadowRealm support is not enabled" }, From 54c46dae9e5a93d5264231e170db0585486a3864 Mon Sep 17 00:00:00 2001 From: "Node.js GitHub Bot" Date: Fri, 24 Nov 2023 19:48:08 +0200 Subject: [PATCH 219/229] deps: update zlib to 1.2.13.1-motley-5daffc7 PR-URL: https://github.com/nodejs/node/pull/50803 Reviewed-By: Luigi Pinca Reviewed-By: James M Snell --- deps/zlib/crc32.c | 4 ++-- deps/zlib/deflate.c | 5 +++++ deps/zlib/gzguts.h | 5 ++--- deps/zlib/gzlib.c | 4 ++-- deps/zlib/zlib.h | 13 ++++++++----- .../maintaining/maintaining-dependencies.md | 6 +++--- src/zlib_version.h | 2 +- 7 files changed, 23 insertions(+), 16 deletions(-) diff --git a/deps/zlib/crc32.c b/deps/zlib/crc32.c index 1e1a57519cbda6..cf8579f30aa707 100644 --- a/deps/zlib/crc32.c +++ b/deps/zlib/crc32.c @@ -792,8 +792,8 @@ unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, words = (z_word_t const *)buf; /* Do endian check at execution time instead of compile time, since ARM - processors can change the endianess at execution time. If the - compiler knows what the endianess will be, it can optimize out the + processors can change the endianness at execution time. If the + compiler knows what the endianness will be, it can optimize out the check and the unused branch. */ endian = 1; if (*(unsigned char *)&endian) { diff --git a/deps/zlib/deflate.c b/deps/zlib/deflate.c index 173ce596be582e..d1611cce058931 100644 --- a/deps/zlib/deflate.c +++ b/deps/zlib/deflate.c @@ -168,6 +168,11 @@ local const config configuration_table[10] = { * bit values at the expense of memory usage). We slide even when level == 0 to * keep the hash table consistent if we switch back to level > 0 later. */ +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) + __attribute__((no_sanitize("memory"))) +# endif +#endif local void slide_hash(deflate_state *s) { #if defined(DEFLATE_SLIDE_HASH_SSE2) || defined(DEFLATE_SLIDE_HASH_NEON) slide_hash_simd(s->head, s->prev, s->w_size, s->hash_size); diff --git a/deps/zlib/gzguts.h b/deps/zlib/gzguts.h index e23f831f531bb1..f9375047e8c58f 100644 --- a/deps/zlib/gzguts.h +++ b/deps/zlib/gzguts.h @@ -7,9 +7,8 @@ # ifndef _LARGEFILE_SOURCE # define _LARGEFILE_SOURCE 1 # endif -# ifdef _FILE_OFFSET_BITS -# undef _FILE_OFFSET_BITS -# endif +# undef _FILE_OFFSET_BITS +# undef _TIME_BITS #endif #ifdef HAVE_HIDDEN diff --git a/deps/zlib/gzlib.c b/deps/zlib/gzlib.c index 9810e3553e0263..0d3ebf8d297e57 100644 --- a/deps/zlib/gzlib.c +++ b/deps/zlib/gzlib.c @@ -311,8 +311,8 @@ int ZEXPORT gzbuffer(gzFile file, unsigned size) { /* check and set requested size */ if ((size << 1) < size) return -1; /* need to be able to double it */ - if (size < 2) - size = 2; /* need two bytes to check magic header */ + if (size < 8) + size = 8; /* needed to behave well with flushing */ state->want = size; return 0; } diff --git a/deps/zlib/zlib.h b/deps/zlib/zlib.h index 014331fc8dc33f..cd7b165cc863e9 100644 --- a/deps/zlib/zlib.h +++ b/deps/zlib/zlib.h @@ -230,7 +230,7 @@ ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level); Initializes the internal stream state for compression. The fields zalloc, zfree and opaque must be initialized before by the caller. If zalloc and zfree are set to Z_NULL, deflateInit updates them to use default - allocation functions. + allocation functions. total_in, total_out, adler, and msg are initialized. The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: 1 gives best speed, 9 gives best compression, 0 gives no compression at all @@ -382,7 +382,8 @@ ZEXTERN int ZEXPORT inflateInit(z_streamp strm); read or consumed. The allocation of a sliding window will be deferred to the first call of inflate (if the decompression does not complete on the first call). If zalloc and zfree are set to Z_NULL, inflateInit updates - them to use default allocation functions. + them to use default allocation functions. total_in, total_out, adler, and + msg are initialized. inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough memory, Z_VERSION_ERROR if the zlib library version is incompatible with the @@ -695,7 +696,7 @@ ZEXTERN int ZEXPORT deflateReset(z_streamp strm); This function is equivalent to deflateEnd followed by deflateInit, but does not free and reallocate the internal compression state. The stream will leave the compression level and any other attributes that may have been - set unchanged. + set unchanged. total_in, total_out, adler, and msg are initialized. deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent (such as zalloc or state being Z_NULL). @@ -820,8 +821,9 @@ ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm, gzip file" and give up. If deflateSetHeader is not used, the default gzip header has text false, - the time set to zero, and os set to 255, with no extra, name, or comment - fields. The gzip header is returned to the default state by deflateReset(). + the time set to zero, and os set to the current operating system, with no + extra, name, or comment fields. The gzip header is returned to the default + state by deflateReset(). deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent. @@ -960,6 +962,7 @@ ZEXTERN int ZEXPORT inflateReset(z_streamp strm); This function is equivalent to inflateEnd followed by inflateInit, but does not free and reallocate the internal decompression state. The stream will keep attributes that may have been set by inflateInit2. + total_in, total_out, adler, and msg are initialized. inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source stream state was inconsistent (such as zalloc or state being Z_NULL). diff --git a/doc/contributing/maintaining/maintaining-dependencies.md b/doc/contributing/maintaining/maintaining-dependencies.md index d706d08c55d497..acdd139adc3a5c 100644 --- a/doc/contributing/maintaining/maintaining-dependencies.md +++ b/doc/contributing/maintaining/maintaining-dependencies.md @@ -31,7 +31,7 @@ This a list of all the dependencies: * [undici 5.27.2][] * [uvwasi 0.0.19][] * [V8 11.3.244.8][] -* [zlib 1.2.13.1-motley-dfc48fc][] +* [zlib 1.2.13.1-motley-5daffc7][] Any code which meets one or more of these conditions should be managed as a dependency: @@ -311,7 +311,7 @@ See [maintaining-web-assembly][] for more informations. high-performance JavaScript and WebAssembly engine, written in C++. See [maintaining-V8][] for more informations. -### zlib 1.2.13.1-motley-dfc48fc +### zlib 1.2.13.1-motley-5daffc7 The [zlib](https://chromium.googlesource.com/chromium/src/+/refs/heads/main/third_party/zlib) dependency lossless data-compression library, @@ -349,4 +349,4 @@ performance improvements not currently available in standard zlib. [update-openssl-action]: ../../../.github/workflows/update-openssl.yml [uvwasi 0.0.19]: #uvwasi-0019 [v8 11.3.244.8]: #v8-1132448 -[zlib 1.2.13.1-motley-dfc48fc]: #zlib-12131-motley-dfc48fc +[zlib 1.2.13.1-motley-5daffc7]: #zlib-12131-motley-5daffc7 diff --git a/src/zlib_version.h b/src/zlib_version.h index 02390a4afd0ec5..cb48f4e9a02ad4 100644 --- a/src/zlib_version.h +++ b/src/zlib_version.h @@ -2,5 +2,5 @@ // Refer to tools/dep_updaters/update-zlib.sh #ifndef SRC_ZLIB_VERSION_H_ #define SRC_ZLIB_VERSION_H_ -#define ZLIB_VERSION "1.2.13.1-motley-dfc48fc" +#define ZLIB_VERSION "1.2.13.1-motley-5daffc7" #endif // SRC_ZLIB_VERSION_H_ From 989814093e3f97a9ee1690ed8ee496c930e79444 Mon Sep 17 00:00:00 2001 From: Stefan Stojanovic Date: Fri, 8 Dec 2023 22:58:17 +0100 Subject: [PATCH 220/229] win,tools: upgrade Windows signing to smctl As a part of the new signing requrements for Windows change approach to use the DigiCert cloud HSM service KeyLocker. PR-URL: https://github.com/nodejs/node/pull/50956 Fixes: https://github.com/nodejs/build/issues/3491 Reviewed-By: Richard Lau Reviewed-By: Michael Dawson --- tools/sign.bat | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tools/sign.bat b/tools/sign.bat index fae06583b4923f..f4d18d9f7a8a0d 100644 --- a/tools/sign.bat +++ b/tools/sign.bat @@ -1,15 +1,12 @@ @echo off -set timeservers=(http://timestamp.globalsign.com/scripts/timestamp.dll http://timestamp.comodoca.com/authenticode http://timestamp.verisign.com/scripts/timestamp.dll http://tsa.starfieldtech.com) - -for %%s in %timeservers% do ( - signtool sign /a /d "Node.js" /du "https://nodejs.org" /fd SHA256 /t %%s %1 - if not ERRORLEVEL 1 ( - echo Successfully signed %1 using timeserver %%s - exit /b 0 - ) - echo Signing %1 failed using %%s +@REM From December 2023, new certificates use DigiCert cloud HSM service for EV signing. +@REM They provide a client side app smctl.exe for managing certificates and signing process. +@REM Release CI machines are configured to have it in the PATH so this can be used safely. +smctl sign -k key_nodejs -i %1 +if not ERRORLEVEL 1 ( + echo Successfully signed %1 using smctl + exit /b 0 ) - -echo Could not sign %1 using any available timeserver -exit /b 1 +echo Could not sign %1 using smctl +exit /b 1 \ No newline at end of file From 621c4d66c28498f2160c0d1c26c80a905b2c1a5e Mon Sep 17 00:00:00 2001 From: legendecas Date: Fri, 10 Nov 2023 00:20:59 +0800 Subject: [PATCH 221/229] src: make process binding data weak Avoid the realm being strongly referenced by the process binding data. PR-URL: https://github.com/nodejs/node/pull/48655 Backport-PR-URL: https://github.com/nodejs/node/pull/51239 Reviewed-By: Matteo Collina Reviewed-By: Antoine du Hamel Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- lib/internal/process/per_thread.js | 5 +-- src/node_process.h | 18 +++++---- src/node_process_methods.cc | 62 +++++++++++++++++++----------- 3 files changed, 52 insertions(+), 33 deletions(-) diff --git a/lib/internal/process/per_thread.js b/lib/internal/process/per_thread.js index ce4822af019d5f..e88a18e75a667e 100644 --- a/lib/internal/process/per_thread.js +++ b/lib/internal/process/per_thread.js @@ -28,7 +28,6 @@ const { StringPrototypeStartsWith, Symbol, SymbolIterator, - Uint32Array, } = primordials; const { @@ -65,10 +64,10 @@ function refreshHrtimeBuffer() { // The 3 entries filled in by the original process.hrtime contains // the upper/lower 32 bits of the second part of the value, // and the remaining nanoseconds of the value. - hrValues = new Uint32Array(binding.hrtimeBuffer); + hrValues = binding.hrtimeBuffer; // Use a BigUint64Array in the closure because this is actually a bit // faster than simply returning a BigInt from C++ in V8 7.1. - hrBigintValues = new BigUint64Array(binding.hrtimeBuffer, 0, 1); + hrBigintValues = new BigUint64Array(binding.hrtimeBuffer.buffer, 0, 1); } // Create the buffers. diff --git a/src/node_process.h b/src/node_process.h index cb8c7962825f46..c911b3ddd7b78f 100644 --- a/src/node_process.h +++ b/src/node_process.h @@ -48,16 +48,20 @@ void PatchProcessObject(const v8::FunctionCallbackInfo& args); namespace process { class BindingData : public SnapshotableObject { public: + struct InternalFieldInfo : public node::InternalFieldInfoBase { + AliasedBufferIndex hrtime_buffer; + }; + static void AddMethods(v8::Isolate* isolate, v8::Local target); static void RegisterExternalReferences(ExternalReferenceRegistry* registry); - using InternalFieldInfo = InternalFieldInfoBase; - SERIALIZABLE_OBJECT_METHODS() SET_BINDING_ID(process_binding_data) - BindingData(Realm* realm, v8::Local object); + BindingData(Realm* realm, + v8::Local object, + InternalFieldInfo* info = nullptr); void MemoryInfo(MemoryTracker* tracker) const override; SET_MEMORY_INFO_NAME(BindingData) @@ -81,10 +85,10 @@ class BindingData : public SnapshotableObject { static void SlowBigInt(const v8::FunctionCallbackInfo& args); private: - static constexpr size_t kBufferSize = - std::max(sizeof(uint64_t), sizeof(uint32_t) * 3); - v8::Global array_buffer_; - std::shared_ptr backing_store_; + // Buffer length in uint32. + static constexpr size_t kHrTimeBufferLength = 3; + AliasedUint32Array hrtime_buffer_; + InternalFieldInfo* internal_field_info_ = nullptr; // These need to be static so that we have their addresses available to // register as external references in the snapshot at environment creation diff --git a/src/node_process_methods.cc b/src/node_process_methods.cc index 34d3c3af4c3e10..0342658c35ebdb 100644 --- a/src/node_process_methods.cc +++ b/src/node_process_methods.cc @@ -465,15 +465,29 @@ static void ReallyExit(const FunctionCallbackInfo& args) { namespace process { -BindingData::BindingData(Realm* realm, v8::Local object) - : SnapshotableObject(realm, object, type_int) { +BindingData::BindingData(Realm* realm, + v8::Local object, + InternalFieldInfo* info) + : SnapshotableObject(realm, object, type_int), + hrtime_buffer_(realm->isolate(), + kHrTimeBufferLength, + MAYBE_FIELD_PTR(info, hrtime_buffer)) { Isolate* isolate = realm->isolate(); Local context = realm->context(); - Local ab = ArrayBuffer::New(isolate, kBufferSize); - array_buffer_.Reset(isolate, ab); - object->Set(context, FIXED_ONE_BYTE_STRING(isolate, "hrtimeBuffer"), ab) - .ToChecked(); - backing_store_ = ab->GetBackingStore(); + + if (info == nullptr) { + object + ->Set(context, + FIXED_ONE_BYTE_STRING(isolate, "hrtimeBuffer"), + hrtime_buffer_.GetJSArray()) + .ToChecked(); + } else { + hrtime_buffer_.Deserialize(realm->context()); + } + + // The hrtime buffer is referenced from the binding data js object. + // Make the native handle weak to avoid keeping the realm alive. + hrtime_buffer_.MakeWeak(); } v8::CFunction BindingData::fast_number_(v8::CFunction::Make(FastNumber)); @@ -503,7 +517,7 @@ BindingData* BindingData::FromV8Value(Local value) { } void BindingData::MemoryInfo(MemoryTracker* tracker) const { - tracker->TrackField("array_buffer", array_buffer_); + tracker->TrackField("hrtime_buffer", hrtime_buffer_); } // This is the legacy version of hrtime before BigInt was introduced in @@ -516,20 +530,19 @@ void BindingData::MemoryInfo(MemoryTracker* tracker) const { // because there is no Uint64Array in JS. // The third entry contains the remaining nanosecond part of the value. void BindingData::NumberImpl(BindingData* receiver) { - // Make sure we don't accidentally access buffers wiped for snapshot. - CHECK(!receiver->array_buffer_.IsEmpty()); uint64_t t = uv_hrtime(); - uint32_t* fields = static_cast(receiver->backing_store_->Data()); - fields[0] = (t / NANOS_PER_SEC) >> 32; - fields[1] = (t / NANOS_PER_SEC) & 0xffffffff; - fields[2] = t % NANOS_PER_SEC; + receiver->hrtime_buffer_[0] = (t / NANOS_PER_SEC) >> 32; + receiver->hrtime_buffer_[1] = (t / NANOS_PER_SEC) & 0xffffffff; + receiver->hrtime_buffer_[2] = t % NANOS_PER_SEC; } void BindingData::BigIntImpl(BindingData* receiver) { - // Make sure we don't accidentally access buffers wiped for snapshot. - CHECK(!receiver->array_buffer_.IsEmpty()); uint64_t t = uv_hrtime(); - uint64_t* fields = static_cast(receiver->backing_store_->Data()); + // The buffer is a Uint32Array, so we need to reinterpret it as a + // Uint64Array to write the value. The buffer is valid at this scope so we + // can safely cast away the constness. + uint64_t* fields = reinterpret_cast( + const_cast(receiver->hrtime_buffer_.GetNativeBuffer())); fields[0] = t; } @@ -543,9 +556,10 @@ void BindingData::SlowNumber(const v8::FunctionCallbackInfo& args) { bool BindingData::PrepareForSerialization(Local context, v8::SnapshotCreator* creator) { - // It's not worth keeping. - // Release it, we will recreate it when the instance is dehydrated. - array_buffer_.Reset(); + DCHECK_NULL(internal_field_info_); + internal_field_info_ = InternalFieldInfoBase::New(type()); + internal_field_info_->hrtime_buffer = + hrtime_buffer_.Serialize(context, creator); // Return true because we need to maintain the reference to the binding from // JS land. return true; @@ -553,8 +567,8 @@ bool BindingData::PrepareForSerialization(Local context, InternalFieldInfoBase* BindingData::Serialize(int index) { DCHECK_IS_SNAPSHOT_SLOT(index); - InternalFieldInfo* info = - InternalFieldInfoBase::New(type()); + InternalFieldInfo* info = internal_field_info_; + internal_field_info_ = nullptr; return info; } @@ -566,7 +580,9 @@ void BindingData::Deserialize(Local context, v8::HandleScope scope(context->GetIsolate()); Realm* realm = Realm::GetCurrent(context); // Recreate the buffer in the constructor. - BindingData* binding = realm->AddBindingData(holder); + InternalFieldInfo* casted_info = static_cast(info); + BindingData* binding = + realm->AddBindingData(holder, casted_info); CHECK_NOT_NULL(binding); } From 14cc3b9b9000f32f43be314270291134ccf0bf0c Mon Sep 17 00:00:00 2001 From: legendecas Date: Fri, 10 Nov 2023 00:21:03 +0800 Subject: [PATCH 222/229] src: create worker per isolate properties PR-URL: https://github.com/nodejs/node/pull/48655 Backport-PR-URL: https://github.com/nodejs/node/pull/51239 Reviewed-By: Matteo Collina Reviewed-By: Antoine du Hamel Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- src/node_binding.h | 5 ++-- src/node_messaging.cc | 70 ++++++++++++++++++++++++------------------- src/node_messaging.h | 2 +- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/src/node_binding.h b/src/node_binding.h index 9f0692ca4e190b..b09b8a5e521b18 100644 --- a/src/node_binding.h +++ b/src/node_binding.h @@ -37,10 +37,11 @@ static_assert(static_cast(NM_F_LINKED) == V(contextify) \ V(encoding_binding) \ V(fs) \ + V(messaging) \ V(mksnapshot) \ - V(timers) \ - V(process_methods) \ V(performance) \ + V(process_methods) \ + V(timers) \ V(url) \ V(worker) \ NODE_BUILTIN_ICU_BINDINGS(V) diff --git a/src/node_messaging.cc b/src/node_messaging.cc index 04b998bb28ce4b..6c9a23e669b29d 100644 --- a/src/node_messaging.cc +++ b/src/node_messaging.cc @@ -30,6 +30,7 @@ using v8::Maybe; using v8::MaybeLocal; using v8::Nothing; using v8::Object; +using v8::ObjectTemplate; using v8::SharedArrayBuffer; using v8::SharedValueConveyor; using v8::String; @@ -707,7 +708,8 @@ MessagePort* MessagePort::New( std::unique_ptr data, std::shared_ptr sibling_group) { Context::Scope context_scope(context); - Local ctor_templ = GetMessagePortConstructorTemplate(env); + Local ctor_templ = + GetMessagePortConstructorTemplate(env->isolate_data()); // Construct a new instance, then assign the listener instance and possibly // the MessagePortData to it. @@ -1107,7 +1109,8 @@ void MessagePort::Stop(const FunctionCallbackInfo& args) { void MessagePort::CheckType(const FunctionCallbackInfo& args) { Environment* env = Environment::GetCurrent(args); args.GetReturnValue().Set( - GetMessagePortConstructorTemplate(env)->HasInstance(args[0])); + GetMessagePortConstructorTemplate(env->isolate_data()) + ->HasInstance(args[0])); } void MessagePort::Drain(const FunctionCallbackInfo& args) { @@ -1184,28 +1187,30 @@ void MessagePort::MemoryInfo(MemoryTracker* tracker) const { tracker->TrackField("emit_message_fn", emit_message_fn_); } -Local GetMessagePortConstructorTemplate(Environment* env) { +Local GetMessagePortConstructorTemplate( + IsolateData* isolate_data) { // Factor generating the MessagePort JS constructor into its own piece // of code, because it is needed early on in the child environment setup. - Local templ = env->message_port_constructor_template(); + Local templ = + isolate_data->message_port_constructor_template(); if (!templ.IsEmpty()) return templ; { - Isolate* isolate = env->isolate(); + Isolate* isolate = isolate_data->isolate(); Local m = NewFunctionTemplate(isolate, MessagePort::New); - m->SetClassName(env->message_port_constructor_string()); + m->SetClassName(isolate_data->message_port_constructor_string()); m->InstanceTemplate()->SetInternalFieldCount( MessagePort::kInternalFieldCount); - m->Inherit(HandleWrap::GetConstructorTemplate(env)); + m->Inherit(HandleWrap::GetConstructorTemplate(isolate_data)); SetProtoMethod(isolate, m, "postMessage", MessagePort::PostMessage); SetProtoMethod(isolate, m, "start", MessagePort::Start); - env->set_message_port_constructor_template(m); + isolate_data->set_message_port_constructor_template(m); } - return GetMessagePortConstructorTemplate(env); + return GetMessagePortConstructorTemplate(isolate_data); } JSTransferable::JSTransferable(Environment* env, Local obj) @@ -1573,15 +1578,12 @@ static void BroadcastChannel(const FunctionCallbackInfo& args) { } } -static void InitMessaging(Local target, - Local unused, - Local context, - void* priv) { - Environment* env = Environment::GetCurrent(context); - Isolate* isolate = env->isolate(); +static void CreatePerIsolateProperties(IsolateData* isolate_data, + Local target) { + Isolate* isolate = isolate_data->isolate(); { - SetConstructorFunction(context, + SetConstructorFunction(isolate, target, "MessageChannel", NewFunctionTemplate(isolate, MessageChannel)); @@ -1594,31 +1596,36 @@ static void InitMessaging(Local target, JSTransferable::kInternalFieldCount); t->SetClassName(OneByteString(isolate, "JSTransferable")); SetConstructorFunction( - context, target, "JSTransferable", t, SetConstructorFunctionFlag::NONE); + isolate, target, "JSTransferable", t, SetConstructorFunctionFlag::NONE); } - SetConstructorFunction(context, + SetConstructorFunction(isolate, target, - env->message_port_constructor_string(), - GetMessagePortConstructorTemplate(env), - SetConstructorFunctionFlag::NONE); + isolate_data->message_port_constructor_string(), + GetMessagePortConstructorTemplate(isolate_data)); // These are not methods on the MessagePort prototype, because // the browser equivalents do not provide them. - SetMethod(context, target, "stopMessagePort", MessagePort::Stop); - SetMethod(context, target, "checkMessagePort", MessagePort::CheckType); - SetMethod(context, target, "drainMessagePort", MessagePort::Drain); + SetMethod(isolate, target, "stopMessagePort", MessagePort::Stop); + SetMethod(isolate, target, "checkMessagePort", MessagePort::CheckType); + SetMethod(isolate, target, "drainMessagePort", MessagePort::Drain); SetMethod( - context, target, "receiveMessageOnPort", MessagePort::ReceiveMessage); + isolate, target, "receiveMessageOnPort", MessagePort::ReceiveMessage); SetMethod( - context, target, "moveMessagePortToContext", MessagePort::MoveToContext); - SetMethod(context, + isolate, target, "moveMessagePortToContext", MessagePort::MoveToContext); + SetMethod(isolate, target, "setDeserializerCreateObjectFunction", SetDeserializerCreateObjectFunction); - SetMethod(context, target, "broadcastChannel", BroadcastChannel); - SetMethod(context, target, "structuredClone", StructuredClone); + SetMethod(isolate, target, "broadcastChannel", BroadcastChannel); + SetMethod(isolate, target, "structuredClone", StructuredClone); +} +static void CreatePerContextProperties(Local target, + Local unused, + Local context, + void* priv) { + Environment* env = Environment::GetCurrent(context); { Local domexception = GetDOMException(context).ToLocalChecked(); target @@ -1650,6 +1657,9 @@ static void RegisterExternalReferences(ExternalReferenceRegistry* registry) { } // namespace worker } // namespace node -NODE_BINDING_CONTEXT_AWARE_INTERNAL(messaging, node::worker::InitMessaging) +NODE_BINDING_CONTEXT_AWARE_INTERNAL(messaging, + node::worker::CreatePerContextProperties) +NODE_BINDING_PER_ISOLATE_INIT(messaging, + node::worker::CreatePerIsolateProperties) NODE_BINDING_EXTERNAL_REFERENCE(messaging, node::worker::RegisterExternalReferences) diff --git a/src/node_messaging.h b/src/node_messaging.h index 1c2a564d8d58e1..2207a7fd2a0ef9 100644 --- a/src/node_messaging.h +++ b/src/node_messaging.h @@ -366,7 +366,7 @@ class JSTransferable : public BaseObject { }; v8::Local GetMessagePortConstructorTemplate( - Environment* env); + IsolateData* isolate_data); } // namespace worker } // namespace node From d00412a0837cd3d1f4562ad51aa2905062d05eaa Mon Sep 17 00:00:00 2001 From: legendecas Date: Fri, 10 Nov 2023 00:21:06 +0800 Subject: [PATCH 223/229] src: create fs_dir per isolate properties PR-URL: https://github.com/nodejs/node/pull/48655 Backport-PR-URL: https://github.com/nodejs/node/pull/51239 Reviewed-By: Matteo Collina Reviewed-By: Antoine du Hamel Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- src/node_binding.h | 1 + src/node_dir.cc | 28 ++++++++++++++++------------ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/node_binding.h b/src/node_binding.h index b09b8a5e521b18..2901231090cce3 100644 --- a/src/node_binding.h +++ b/src/node_binding.h @@ -37,6 +37,7 @@ static_assert(static_cast(NM_F_LINKED) == V(contextify) \ V(encoding_binding) \ V(fs) \ + V(fs_dir) \ V(messaging) \ V(mksnapshot) \ V(performance) \ diff --git a/src/node_dir.cc b/src/node_dir.cc index 10cde6067899c7..21d5b880ccee98 100644 --- a/src/node_dir.cc +++ b/src/node_dir.cc @@ -423,27 +423,29 @@ static void OpenDirSync(const FunctionCallbackInfo& args) { args.GetReturnValue().Set(handle->object().As()); } -void Initialize(Local target, - Local unused, - Local context, - void* priv) { - Environment* env = Environment::GetCurrent(context); - Isolate* isolate = env->isolate(); +void CreatePerIsolateProperties(IsolateData* isolate_data, + Local target) { + Isolate* isolate = isolate_data->isolate(); - SetMethod(context, target, "opendir", OpenDir); - SetMethod(context, target, "opendirSync", OpenDirSync); + SetMethod(isolate, target, "opendir", OpenDir); + SetMethod(isolate, target, "opendirSync", OpenDirSync); // Create FunctionTemplate for DirHandle Local dir = NewFunctionTemplate(isolate, DirHandle::New); - dir->Inherit(AsyncWrap::GetConstructorTemplate(env)); + dir->Inherit(AsyncWrap::GetConstructorTemplate(isolate_data)); SetProtoMethod(isolate, dir, "read", DirHandle::Read); SetProtoMethod(isolate, dir, "close", DirHandle::Close); Local dirt = dir->InstanceTemplate(); dirt->SetInternalFieldCount(DirHandle::kInternalFieldCount); - SetConstructorFunction(context, target, "DirHandle", dir); - env->set_dir_instance_template(dirt); + SetConstructorFunction(isolate, target, "DirHandle", dir); + isolate_data->set_dir_instance_template(dirt); } +void CreatePerContextProperties(Local target, + Local unused, + Local context, + void* priv) {} + void RegisterExternalReferences(ExternalReferenceRegistry* registry) { registry->Register(OpenDir); registry->Register(OpenDirSync); @@ -456,6 +458,8 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) { } // end namespace node -NODE_BINDING_CONTEXT_AWARE_INTERNAL(fs_dir, node::fs_dir::Initialize) +NODE_BINDING_CONTEXT_AWARE_INTERNAL(fs_dir, + node::fs_dir::CreatePerContextProperties) +NODE_BINDING_PER_ISOLATE_INIT(fs_dir, node::fs_dir::CreatePerIsolateProperties) NODE_BINDING_EXTERNAL_REFERENCE(fs_dir, node::fs_dir::RegisterExternalReferences) From 11b3e470db28b199c4f4fddbeb3de6b0c4246ac4 Mon Sep 17 00:00:00 2001 From: legendecas Date: Fri, 10 Nov 2023 00:21:08 +0800 Subject: [PATCH 224/229] src: create per isolate proxy env template PR-URL: https://github.com/nodejs/node/pull/48655 Backport-PR-URL: https://github.com/nodejs/node/pull/51239 Reviewed-By: Matteo Collina Reviewed-By: Antoine du Hamel Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- src/env.cc | 1 + src/node_env_var.cc | 3 ++- src/node_process.h | 2 +- src/node_realm.cc | 5 +++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/env.cc b/src/env.cc index 6349cd802a6210..d797fcd527f422 100644 --- a/src/env.cc +++ b/src/env.cc @@ -504,6 +504,7 @@ void IsolateData::CreateProperties() { binding::CreateInternalBindingTemplates(this); contextify::ContextifyContext::InitializeGlobalTemplates(this); + CreateEnvProxyTemplate(this); } constexpr uint16_t kDefaultCppGCEmebdderID = 0x90de; diff --git a/src/node_env_var.cc b/src/node_env_var.cc index 94936ea1c2bd22..bce7ae07214ddf 100644 --- a/src/node_env_var.cc +++ b/src/node_env_var.cc @@ -456,7 +456,8 @@ static void EnvDefiner(Local property, } } -void CreateEnvProxyTemplate(Isolate* isolate, IsolateData* isolate_data) { +void CreateEnvProxyTemplate(IsolateData* isolate_data) { + Isolate* isolate = isolate_data->isolate(); HandleScope scope(isolate); if (!isolate_data->env_proxy_template().IsEmpty()) return; Local env_proxy_ctor_template = diff --git a/src/node_process.h b/src/node_process.h index c911b3ddd7b78f..ee6e6a81676bd6 100644 --- a/src/node_process.h +++ b/src/node_process.h @@ -15,7 +15,7 @@ class MemoryTracker; class ExternalReferenceRegistry; class Realm; -void CreateEnvProxyTemplate(v8::Isolate* isolate, IsolateData* isolate_data); +void CreateEnvProxyTemplate(IsolateData* isolate_data); // Most of the time, it's best to use `console.error` to write // to the process.stderr stream. However, in some cases, such as diff --git a/src/node_realm.cc b/src/node_realm.cc index 7101b23e347c7c..23fb6bd55213ee 100644 --- a/src/node_realm.cc +++ b/src/node_realm.cc @@ -344,10 +344,11 @@ MaybeLocal PrincipalRealm::BootstrapRealm() { return MaybeLocal(); } + // Setup process.env proxy. Local env_string = FIXED_ONE_BYTE_STRING(isolate_, "env"); Local env_proxy; - CreateEnvProxyTemplate(isolate_, env_->isolate_data()); - if (!env_->env_proxy_template()->NewInstance(context()).ToLocal(&env_proxy) || + if (!isolate_data()->env_proxy_template()->NewInstance(context()).ToLocal( + &env_proxy) || process_object()->Set(context(), env_string, env_proxy).IsNothing()) { return MaybeLocal(); } From 8e886a2fffa1e5b874458177302a5c35df5d44bc Mon Sep 17 00:00:00 2001 From: legendecas Date: Fri, 10 Nov 2023 00:21:10 +0800 Subject: [PATCH 225/229] module: remove useCustomLoadersIfPresent flag The flag is always true and can be determined by isLoaderWorker solely. PR-URL: https://github.com/nodejs/node/pull/48655 Backport-PR-URL: https://github.com/nodejs/node/pull/51239 Reviewed-By: Matteo Collina Reviewed-By: Antoine du Hamel Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- lib/internal/modules/esm/loader.js | 10 ++++------ lib/internal/process/esm_loader.js | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/internal/modules/esm/loader.js b/lib/internal/modules/esm/loader.js index 6044765c3709f5..d1ce6f1df1bceb 100644 --- a/lib/internal/modules/esm/loader.js +++ b/lib/internal/modules/esm/loader.js @@ -524,15 +524,13 @@ let emittedLoaderFlagWarning = false; * A loader instance is used as the main entry point for loading ES modules. Currently, this is a singleton; there is * only one used for loading the main module and everything in its dependency graph, though separate instances of this * class might be instantiated as part of bootstrap for other purposes. - * @param {boolean} useCustomLoadersIfPresent If the user has provided loaders via the --loader flag, use them. * @returns {ModuleLoader} */ -function createModuleLoader(useCustomLoadersIfPresent = true) { +function createModuleLoader() { let customizations = null; - if (useCustomLoadersIfPresent && - // Don't spawn a new worker if we're already in a worker thread created by instantiating CustomizedModuleLoader; - // doing so would cause an infinite loop. - !require('internal/modules/esm/utils').isLoaderWorker()) { + // Don't spawn a new worker if we're already in a worker thread created by instantiating CustomizedModuleLoader; + // doing so would cause an infinite loop. + if (!require('internal/modules/esm/utils').isLoaderWorker()) { const userLoaderPaths = getOptionValue('--experimental-loader'); if (userLoaderPaths.length > 0) { if (!emittedLoaderFlagWarning) { diff --git a/lib/internal/process/esm_loader.js b/lib/internal/process/esm_loader.js index 47c909c9e55435..0865d7ceef66b7 100644 --- a/lib/internal/process/esm_loader.js +++ b/lib/internal/process/esm_loader.js @@ -11,10 +11,10 @@ let esmLoader; module.exports = { get esmLoader() { - return esmLoader ??= createModuleLoader(true); + return esmLoader ??= createModuleLoader(); }, async loadESM(callback) { - esmLoader ??= createModuleLoader(true); + esmLoader ??= createModuleLoader(); try { const userImports = getOptionValue('--import'); if (userImports.length > 0) { From 21ab3c0f0ba82cde243b26b51c37a3789b915088 Mon Sep 17 00:00:00 2001 From: Chengzhong Wu Date: Fri, 10 Nov 2023 00:21:15 +0800 Subject: [PATCH 226/229] module: bootstrap module loaders in shadow realm This bootstraps ESM loaders in the ShadowRealm with `ShadowRealm.prototype.importValue` as its entry point and enables loading ESM and CJS modules in the ShadowRealm. The module is imported without a parent URL and resolved with the current process's working directory. PR-URL: https://github.com/nodejs/node/pull/48655 Backport-PR-URL: https://github.com/nodejs/node/pull/51239 Reviewed-By: Matteo Collina Reviewed-By: Antoine du Hamel Reviewed-By: James M Snell Reviewed-By: Joyee Cheung --- lib/internal/bootstrap/realm.js | 13 +- lib/internal/bootstrap/shadow_realm.js | 21 ++ lib/internal/main/worker_thread.js | 1 + lib/internal/modules/esm/loader.js | 7 +- lib/internal/modules/esm/utils.js | 43 ++++- lib/internal/process/pre_execution.js | 38 +++- src/async_wrap.cc | 22 ++- src/env.cc | 11 +- src/module_wrap.cc | 182 ++++++++++-------- src/module_wrap.h | 13 +- src/node_binding.h | 1 + src/node_errors.h | 4 + src/node_shadow_realm.cc | 34 ++++ .../es-module-shadow-realm/custom-loaders.js | 15 ++ .../es-module-shadow-realm/preload-main.js | 9 + .../es-module-shadow-realm/preload.js | 1 + .../re-export-state-counter.mjs | 3 + .../es-module-shadow-realm/state-counter.mjs | 4 + ...st-shadow-realm-allowed-builtin-modules.js | 21 ++ .../test-shadow-realm-custom-loaders.js | 26 +++ test/parallel/test-shadow-realm-gc-module.js | 20 ++ .../test-shadow-realm-import-value-resolve.js | 28 +++ test/parallel/test-shadow-realm-module.js | 29 +++ .../test-shadow-realm-preload-module.js | 20 ++ 24 files changed, 444 insertions(+), 122 deletions(-) create mode 100644 lib/internal/bootstrap/shadow_realm.js create mode 100644 test/fixtures/es-module-shadow-realm/custom-loaders.js create mode 100644 test/fixtures/es-module-shadow-realm/preload-main.js create mode 100644 test/fixtures/es-module-shadow-realm/preload.js create mode 100644 test/fixtures/es-module-shadow-realm/re-export-state-counter.mjs create mode 100644 test/fixtures/es-module-shadow-realm/state-counter.mjs create mode 100644 test/parallel/test-shadow-realm-allowed-builtin-modules.js create mode 100644 test/parallel/test-shadow-realm-custom-loaders.js create mode 100644 test/parallel/test-shadow-realm-gc-module.js create mode 100644 test/parallel/test-shadow-realm-import-value-resolve.js create mode 100644 test/parallel/test-shadow-realm-module.js create mode 100644 test/parallel/test-shadow-realm-preload-module.js diff --git a/lib/internal/bootstrap/realm.js b/lib/internal/bootstrap/realm.js index c6935c6f7775fc..6034af9a36003c 100644 --- a/lib/internal/bootstrap/realm.js +++ b/lib/internal/bootstrap/realm.js @@ -50,6 +50,8 @@ const { ArrayFrom, + ArrayPrototypeFilter, + ArrayPrototypeIncludes, ArrayPrototypeMap, ArrayPrototypePush, ArrayPrototypeSlice, @@ -215,8 +217,8 @@ const internalBuiltinIds = builtinIds .filter((id) => StringPrototypeStartsWith(id, 'internal/') && id !== selfId); // When --expose-internals is on we'll add the internal builtin ids to these. -const canBeRequiredByUsersList = new SafeSet(publicBuiltinIds); -const canBeRequiredByUsersWithoutSchemeList = +let canBeRequiredByUsersList = new SafeSet(publicBuiltinIds); +let canBeRequiredByUsersWithoutSchemeList = new SafeSet(publicBuiltinIds.filter((id) => !schemelessBlockList.has(id))); /** @@ -269,6 +271,13 @@ class BuiltinModule { } } + static setRealmAllowRequireByUsers(ids) { + canBeRequiredByUsersList = + new SafeSet(ArrayPrototypeFilter(ids, (id) => ArrayPrototypeIncludes(publicBuiltinIds, id))); + canBeRequiredByUsersWithoutSchemeList = + new SafeSet(ArrayPrototypeFilter(ids, (id) => !schemelessBlockList.has(id))); + } + // To be called during pre-execution when --expose-internals is on. // Enables the user-land module loader to access internal modules. static exposeInternals() { diff --git a/lib/internal/bootstrap/shadow_realm.js b/lib/internal/bootstrap/shadow_realm.js new file mode 100644 index 00000000000000..b99502355d9818 --- /dev/null +++ b/lib/internal/bootstrap/shadow_realm.js @@ -0,0 +1,21 @@ +'use strict'; + +// This script sets up the context for shadow realms. + +const { + prepareShadowRealmExecution, +} = require('internal/process/pre_execution'); +const { + BuiltinModule, +} = require('internal/bootstrap/realm'); + +BuiltinModule.setRealmAllowRequireByUsers([ + /** + * The built-in modules exposed in the ShadowRealm must each be providing + * platform capabilities with no authority to cause side effects such as + * I/O or mutation of values that are shared across different realms within + * the same Node.js environment. + */ +]); + +prepareShadowRealmExecution(); diff --git a/lib/internal/main/worker_thread.js b/lib/internal/main/worker_thread.js index 12ae4a9b23212d..56697c3b2c2209 100644 --- a/lib/internal/main/worker_thread.js +++ b/lib/internal/main/worker_thread.js @@ -136,6 +136,7 @@ port.on('message', (message) => { const isLoaderWorker = doEval === 'internal' && filename === require('internal/modules/esm/utils').loaderWorkerId; + // Disable custom loaders in loader worker. setupUserModules(isLoaderWorker); if (!hasStdin) diff --git a/lib/internal/modules/esm/loader.js b/lib/internal/modules/esm/loader.js index d1ce6f1df1bceb..0694c7ef2b902d 100644 --- a/lib/internal/modules/esm/loader.js +++ b/lib/internal/modules/esm/loader.js @@ -528,9 +528,10 @@ let emittedLoaderFlagWarning = false; */ function createModuleLoader() { let customizations = null; - // Don't spawn a new worker if we're already in a worker thread created by instantiating CustomizedModuleLoader; - // doing so would cause an infinite loop. - if (!require('internal/modules/esm/utils').isLoaderWorker()) { + // Don't spawn a new worker if custom loaders are disabled. For instance, if + // we're already in a worker thread created by instantiating + // CustomizedModuleLoader; doing so would cause an infinite loop. + if (!require('internal/modules/esm/utils').forceDefaultLoader()) { const userLoaderPaths = getOptionValue('--experimental-loader'); if (userLoaderPaths.length > 0) { if (!emittedLoaderFlagWarning) { diff --git a/lib/internal/modules/esm/utils.js b/lib/internal/modules/esm/utils.js index 8dee1a661b73ac..003ae7eb987f03 100644 --- a/lib/internal/modules/esm/utils.js +++ b/lib/internal/modules/esm/utils.js @@ -4,6 +4,7 @@ const { ArrayIsArray, SafeSet, SafeWeakMap, + Symbol, ObjectFreeze, } = primordials; @@ -157,6 +158,26 @@ function registerModule(referrer, registry) { moduleRegistries.set(idSymbol, registry); } +/** + * Registers the ModuleRegistry for dynamic import() calls with a realm + * as the referrer. Similar to {@link registerModule}, but this function + * generates a new id symbol instead of using the one from the referrer + * object. + * @param {globalThis} globalThis The globalThis object of the realm. + * @param {ModuleRegistry} registry + */ +function registerRealm(globalThis, registry) { + let idSymbol = globalThis[host_defined_option_symbol]; + // If the per-realm host-defined options is already registered, do nothing. + if (idSymbol) { + return; + } + // Otherwise, register the per-realm host-defined options. + idSymbol = Symbol('Realm globalThis'); + globalThis[host_defined_option_symbol] = idSymbol; + moduleRegistries.set(idSymbol, registry); +} + /** * Defines the `import.meta` object for a given module. * @param {symbol} symbol - Reference to the module. @@ -192,28 +213,29 @@ async function importModuleDynamicallyCallback(referrerSymbol, specifier, attrib throw new ERR_VM_DYNAMIC_IMPORT_CALLBACK_MISSING(); } -let _isLoaderWorker = false; +let _forceDefaultLoader = false; /** * Initializes handling of ES modules. * This is configured during pre-execution. Specifically it's set to true for * the loader worker in internal/main/worker_thread.js. - * @param {boolean} [isLoaderWorker=false] - A boolean indicating whether the loader is a worker or not. + * @param {boolean} [forceDefaultLoader=false] - A boolean indicating disabling custom loaders. */ -function initializeESM(isLoaderWorker = false) { - _isLoaderWorker = isLoaderWorker; +function initializeESM(forceDefaultLoader = false) { + _forceDefaultLoader = forceDefaultLoader; initializeDefaultConditions(); - // Setup per-isolate callbacks that locate data or callbacks that we keep + // Setup per-realm callbacks that locate data or callbacks that we keep // track of for different ESM modules. setInitializeImportMetaObjectCallback(initializeImportMetaObject); setImportModuleDynamicallyCallback(importModuleDynamicallyCallback); } /** - * Determine whether the current process is a loader worker. - * @returns {boolean} Whether the current process is a loader worker. + * Determine whether custom loaders are disabled and it is forced to use the + * default loader. + * @returns {boolean} */ -function isLoaderWorker() { - return _isLoaderWorker; +function forceDefaultLoader() { + return _forceDefaultLoader; } /** @@ -253,10 +275,11 @@ async function initializeHooks() { module.exports = { registerModule, + registerRealm, initializeESM, initializeHooks, getDefaultConditions, getConditionsSet, loaderWorkerId: 'internal/modules/esm/worker', - isLoaderWorker, + forceDefaultLoader, }; diff --git a/lib/internal/process/pre_execution.js b/lib/internal/process/pre_execution.js index 917ba90a1c8bbb..9142fed75e9050 100644 --- a/lib/internal/process/pre_execution.js +++ b/lib/internal/process/pre_execution.js @@ -67,6 +67,26 @@ function prepareWorkerThreadExecution() { }); } +function prepareShadowRealmExecution() { + const { registerRealm } = require('internal/modules/esm/utils'); + // Patch the process object with legacy properties and normalizations. + // Do not expand argv1 as it is not available in ShadowRealm. + patchProcessObject(false); + setupDebugEnv(); + + // Disable custom loaders in ShadowRealm. + setupUserModules(true); + registerRealm(globalThis, { + __proto__: null, + importModuleDynamically: (specifier, _referrer, attributes) => { + // The handler for `ShadowRealm.prototype.importValue`. + const { esmLoader } = require('internal/process/esm_loader'); + // `parentURL` is not set in the case of a ShadowRealm top-level import. + return esmLoader.import(specifier, undefined, attributes); + }, + }); +} + function prepareExecution(options) { const { expandArgv1, initializeModules, isMainThread } = options; @@ -160,16 +180,17 @@ function setupSymbolDisposePolyfill() { } } -function setupUserModules(isLoaderWorker = false) { +function setupUserModules(forceDefaultLoader = false) { initializeCJSLoader(); - initializeESMLoader(isLoaderWorker); + initializeESMLoader(forceDefaultLoader); const CJSLoader = require('internal/modules/cjs/loader'); assert(!CJSLoader.hasLoadedAnyUserCJSModule); - // Loader workers are responsible for doing this themselves. - if (isLoaderWorker) { - return; + // Do not enable preload modules if custom loaders are disabled. + // For example, loader workers are responsible for doing this themselves. + // And preload modules are not supported in ShadowRealm as well. + if (!forceDefaultLoader) { + loadPreloadModules(); } - loadPreloadModules(); // Need to be done after --require setup. initializeFrozenIntrinsics(); } @@ -687,9 +708,9 @@ function initializeCJSLoader() { initializeCJS(); } -function initializeESMLoader(isLoaderWorker) { +function initializeESMLoader(forceDefaultLoader) { const { initializeESM } = require('internal/modules/esm/utils'); - initializeESM(isLoaderWorker); + initializeESM(forceDefaultLoader); // Patch the vm module when --experimental-vm-modules is on. // Please update the comments in vm.js when this block changes. @@ -765,6 +786,7 @@ module.exports = { setupUserModules, prepareMainThreadExecution, prepareWorkerThreadExecution, + prepareShadowRealmExecution, markBootstrapComplete, loadPreloadModules, initializeFrozenIntrinsics, diff --git a/src/async_wrap.cc b/src/async_wrap.cc index 42cddc52aed285..8b784cddf41603 100644 --- a/src/async_wrap.cc +++ b/src/async_wrap.cc @@ -372,8 +372,9 @@ void AsyncWrap::CreatePerContextProperties(Local target, Local unused, Local context, void* priv) { - Environment* env = Environment::GetCurrent(context); - Isolate* isolate = env->isolate(); + Realm* realm = Realm::GetCurrent(context); + Environment* env = realm->env(); + Isolate* isolate = realm->isolate(); HandleScope scope(isolate); PropertyAttribute ReadOnlyDontDelete = @@ -446,13 +447,16 @@ void AsyncWrap::CreatePerContextProperties(Local target, #undef FORCE_SET_TARGET_FIELD - env->set_async_hooks_init_function(Local()); - env->set_async_hooks_before_function(Local()); - env->set_async_hooks_after_function(Local()); - env->set_async_hooks_destroy_function(Local()); - env->set_async_hooks_promise_resolve_function(Local()); - env->set_async_hooks_callback_trampoline(Local()); - env->set_async_hooks_binding(target); + // TODO(legendecas): async hook functions are not realm-aware yet. + // This simply avoid overriding principal realm's functions when a + // ShadowRealm initializes the binding. + realm->set_async_hooks_init_function(Local()); + realm->set_async_hooks_before_function(Local()); + realm->set_async_hooks_after_function(Local()); + realm->set_async_hooks_destroy_function(Local()); + realm->set_async_hooks_promise_resolve_function(Local()); + realm->set_async_hooks_callback_trampoline(Local()); + realm->set_async_hooks_binding(target); } void AsyncWrap::RegisterExternalReferences( diff --git a/src/env.cc b/src/env.cc index d797fcd527f422..a429d5526d0af6 100644 --- a/src/env.cc +++ b/src/env.cc @@ -1654,10 +1654,13 @@ void AsyncHooks::MemoryInfo(MemoryTracker* tracker) const { void AsyncHooks::grow_async_ids_stack() { async_ids_stack_.reserve(async_ids_stack_.Length() * 3); - env()->async_hooks_binding()->Set( - env()->context(), - env()->async_ids_stack_string(), - async_ids_stack_.GetJSArray()).Check(); + env() + ->principal_realm() + ->async_hooks_binding() + ->Set(env()->context(), + env()->async_ids_stack_string(), + async_ids_stack_.GetJSArray()) + .Check(); } void AsyncHooks::FailWithCorruptedAsyncStack(double expected_async_id) { diff --git a/src/module_wrap.cc b/src/module_wrap.cc index 46241164bf8bd6..00ee4627f0f05a 100644 --- a/src/module_wrap.cc +++ b/src/module_wrap.cc @@ -39,6 +39,7 @@ using v8::MicrotaskQueue; using v8::Module; using v8::ModuleRequest; using v8::Object; +using v8::ObjectTemplate; using v8::PrimitiveArray; using v8::Promise; using v8::ScriptCompiler; @@ -49,15 +50,16 @@ using v8::UnboundModuleScript; using v8::Undefined; using v8::Value; -ModuleWrap::ModuleWrap(Environment* env, +ModuleWrap::ModuleWrap(Realm* realm, Local object, Local module, Local url, Local context_object, Local synthetic_evaluation_step) - : BaseObject(env, object), - module_(env->isolate(), module), + : BaseObject(realm, object), + module_(realm->isolate(), module), module_hash_(module->GetIdentityHash()) { + realm->env()->hash_to_module_map.emplace(module_hash_, this); object->SetInternalFieldForNodeCore(kModuleSlot, module); object->SetInternalField(kURLSlot, url); object->SetInternalField(kSyntheticEvaluationStepsSlot, @@ -72,7 +74,6 @@ ModuleWrap::ModuleWrap(Environment* env, } ModuleWrap::~ModuleWrap() { - HandleScope scope(env()->isolate()); auto range = env()->hash_to_module_map.equal_range(module_hash_); for (auto it = range.first; it != range.second; ++it) { if (it->second == this) { @@ -107,8 +108,8 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { CHECK(args.IsConstructCall()); CHECK_GE(args.Length(), 3); - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); + Realm* realm = Realm::GetCurrent(args); + Isolate* isolate = realm->isolate(); Local that = args.This(); @@ -122,7 +123,7 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { } else { CHECK(args[1]->IsObject()); contextify_context = ContextifyContext::ContextFromContextifiedSandbox( - env, args[1].As()); + realm->env(), args[1].As()); CHECK_NOT_NULL(contextify_context); context = contextify_context->context(); } @@ -148,8 +149,8 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { Local id_symbol = Symbol::New(isolate, url); host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol); - ShouldNotAbortOnUncaughtScope no_abort_scope(env); - TryCatchScope try_catch(env); + ShouldNotAbortOnUncaughtScope no_abort_scope(realm->env()); + TryCatchScope try_catch(realm->env()); Local module; @@ -206,7 +207,9 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { if (try_catch.HasCaught() && !try_catch.HasTerminated()) { CHECK(!try_catch.Message().IsEmpty()); CHECK(!try_catch.Exception().IsEmpty()); - AppendExceptionLine(env, try_catch.Exception(), try_catch.Message(), + AppendExceptionLine(realm->env(), + try_catch.Exception(), + try_catch.Message(), ErrorHandlingMode::MODULE_ERROR); try_catch.ReThrow(); } @@ -215,18 +218,21 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { if (options == ScriptCompiler::kConsumeCodeCache && source.GetCachedData()->rejected) { THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED( - env, "cachedData buffer was rejected"); + realm, "cachedData buffer was rejected"); try_catch.ReThrow(); return; } } } - if (!that->Set(context, env->url_string(), url).FromMaybe(false)) { + if (!that->Set(context, realm->isolate_data()->url_string(), url) + .FromMaybe(false)) { return; } - if (that->SetPrivate(context, env->host_defined_option_symbol(), id_symbol) + if (that->SetPrivate(context, + realm->isolate_data()->host_defined_option_symbol(), + id_symbol) .IsNothing()) { return; } @@ -236,28 +242,26 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { // be stored in an internal field. Local context_object = context->GetExtrasBindingObject(); Local synthetic_evaluation_step = - synthetic ? args[3] : Undefined(env->isolate()).As(); + synthetic ? args[3] : Undefined(realm->isolate()).As(); ModuleWrap* obj = new ModuleWrap( - env, that, module, url, context_object, synthetic_evaluation_step); + realm, that, module, url, context_object, synthetic_evaluation_step); obj->contextify_context_ = contextify_context; - env->hash_to_module_map.emplace(module->GetIdentityHash(), obj); - that->SetIntegrityLevel(context, IntegrityLevel::kFrozen); args.GetReturnValue().Set(that); } static Local createImportAttributesContainer( - Environment* env, Isolate* isolate, Local raw_attributes) { + Realm* realm, Isolate* isolate, Local raw_attributes) { Local attributes = - Object::New(isolate, v8::Null(env->isolate()), nullptr, nullptr, 0); + Object::New(isolate, v8::Null(isolate), nullptr, nullptr, 0); for (int i = 0; i < raw_attributes->Length(); i += 3) { attributes - ->Set(env->context(), - raw_attributes->Get(env->context(), i).As(), - raw_attributes->Get(env->context(), i + 1).As()) + ->Set(realm->context(), + raw_attributes->Get(realm->context(), i).As(), + raw_attributes->Get(realm->context(), i + 1).As()) .ToChecked(); } @@ -265,7 +269,7 @@ static Local createImportAttributesContainer( } void ModuleWrap::Link(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); + Realm* realm = Realm::GetCurrent(args); Isolate* isolate = args.GetIsolate(); CHECK_EQ(args.Length(), 1); @@ -292,14 +296,14 @@ void ModuleWrap::Link(const FunctionCallbackInfo& args) { // call the dependency resolve callbacks for (int i = 0; i < module_requests_length; i++) { Local module_request = - module_requests->Get(env->context(), i).As(); + module_requests->Get(realm->context(), i).As(); Local specifier = module_request->GetSpecifier(); - Utf8Value specifier_utf8(env->isolate(), specifier); + Utf8Value specifier_utf8(realm->isolate(), specifier); std::string specifier_std(*specifier_utf8, specifier_utf8.length()); Local raw_attributes = module_request->GetImportAssertions(); Local attributes = - createImportAttributesContainer(env, isolate, raw_attributes); + createImportAttributesContainer(realm, isolate, raw_attributes); Local argv[] = { specifier, @@ -315,11 +319,11 @@ void ModuleWrap::Link(const FunctionCallbackInfo& args) { maybe_resolve_return_value.ToLocalChecked(); if (!resolve_return_value->IsPromise()) { THROW_ERR_VM_MODULE_LINK_FAILURE( - env, "request for '%s' did not return promise", specifier_std); + realm, "request for '%s' did not return promise", specifier_std); return; } Local resolve_promise = resolve_return_value.As(); - obj->resolve_cache_[specifier_std].Reset(env->isolate(), resolve_promise); + obj->resolve_cache_[specifier_std].Reset(isolate, resolve_promise); promises[i] = resolve_promise; } @@ -329,13 +333,13 @@ void ModuleWrap::Link(const FunctionCallbackInfo& args) { } void ModuleWrap::Instantiate(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); + Realm* realm = Realm::GetCurrent(args); Isolate* isolate = args.GetIsolate(); ModuleWrap* obj; ASSIGN_OR_RETURN_UNWRAP(&obj, args.This()); Local context = obj->context(); Local module = obj->module_.Get(isolate); - TryCatchScope try_catch(env); + TryCatchScope try_catch(realm->env()); USE(module->InstantiateModule(context, ResolveModuleCallback)); // clear resolve cache on instantiate @@ -344,7 +348,9 @@ void ModuleWrap::Instantiate(const FunctionCallbackInfo& args) { if (try_catch.HasCaught() && !try_catch.HasTerminated()) { CHECK(!try_catch.Message().IsEmpty()); CHECK(!try_catch.Exception().IsEmpty()); - AppendExceptionLine(env, try_catch.Exception(), try_catch.Message(), + AppendExceptionLine(realm->env(), + try_catch.Exception(), + try_catch.Message(), ErrorHandlingMode::MODULE_ERROR); try_catch.ReThrow(); return; @@ -352,8 +358,8 @@ void ModuleWrap::Instantiate(const FunctionCallbackInfo& args) { } void ModuleWrap::Evaluate(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); + Realm* realm = Realm::GetCurrent(args); + Isolate* isolate = realm->isolate(); ModuleWrap* obj; ASSIGN_OR_RETURN_UNWRAP(&obj, args.This()); Local context = obj->context(); @@ -368,14 +374,14 @@ void ModuleWrap::Evaluate(const FunctionCallbackInfo& args) { CHECK_EQ(args.Length(), 2); CHECK(args[0]->IsNumber()); - int64_t timeout = args[0]->IntegerValue(env->context()).FromJust(); + int64_t timeout = args[0]->IntegerValue(realm->context()).FromJust(); CHECK(args[1]->IsBoolean()); bool break_on_sigint = args[1]->IsTrue(); - ShouldNotAbortOnUncaughtScope no_abort_scope(env); - TryCatchScope try_catch(env); - Isolate::SafeForTerminationScope safe_for_termination(env->isolate()); + ShouldNotAbortOnUncaughtScope no_abort_scope(realm->env()); + TryCatchScope try_catch(realm->env()); + Isolate::SafeForTerminationScope safe_for_termination(isolate); bool timed_out = false; bool received_signal = false; @@ -406,16 +412,15 @@ void ModuleWrap::Evaluate(const FunctionCallbackInfo& args) { // Convert the termination exception into a regular exception. if (timed_out || received_signal) { - if (!env->is_main_thread() && env->is_stopping()) - return; - env->isolate()->CancelTerminateExecution(); + if (!realm->env()->is_main_thread() && realm->env()->is_stopping()) return; + isolate->CancelTerminateExecution(); // It is possible that execution was terminated by another timeout in // which this timeout is nested, so check whether one of the watchdogs // from this invocation is responsible for termination. if (timed_out) { - THROW_ERR_SCRIPT_EXECUTION_TIMEOUT(env, timeout); + THROW_ERR_SCRIPT_EXECUTION_TIMEOUT(realm->env(), timeout); } else if (received_signal) { - THROW_ERR_SCRIPT_EXECUTION_INTERRUPTED(env); + THROW_ERR_SCRIPT_EXECUTION_INTERRUPTED(realm->env()); } } @@ -429,7 +434,7 @@ void ModuleWrap::Evaluate(const FunctionCallbackInfo& args) { } void ModuleWrap::GetNamespace(const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); + Realm* realm = Realm::GetCurrent(args); Isolate* isolate = args.GetIsolate(); ModuleWrap* obj; ASSIGN_OR_RETURN_UNWRAP(&obj, args.This()); @@ -439,7 +444,7 @@ void ModuleWrap::GetNamespace(const FunctionCallbackInfo& args) { switch (module->GetStatus()) { case v8::Module::Status::kUninstantiated: case v8::Module::Status::kInstantiating: - return env->ThrowError( + return realm->env()->ThrowError( "cannot get namespace, module has not been instantiated"); case v8::Module::Status::kInstantiated: case v8::Module::Status::kEvaluating: @@ -466,11 +471,11 @@ void ModuleWrap::GetStatus(const FunctionCallbackInfo& args) { void ModuleWrap::GetStaticDependencySpecifiers( const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); + Realm* realm = Realm::GetCurrent(args); ModuleWrap* obj; ASSIGN_OR_RETURN_UNWRAP(&obj, args.This()); - Local module = obj->module_.Get(env->isolate()); + Local module = obj->module_.Get(realm->isolate()); Local module_requests = module->GetModuleRequests(); int count = module_requests->Length(); @@ -479,12 +484,12 @@ void ModuleWrap::GetStaticDependencySpecifiers( for (int i = 0; i < count; i++) { Local module_request = - module_requests->Get(env->context(), i).As(); + module_requests->Get(realm->context(), i).As(); specifiers[i] = module_request->GetSpecifier(); } args.GetReturnValue().Set( - Array::New(env->isolate(), specifiers.out(), count)); + Array::New(realm->isolate(), specifiers.out(), count)); } void ModuleWrap::GetError(const FunctionCallbackInfo& args) { @@ -501,15 +506,13 @@ MaybeLocal ModuleWrap::ResolveModuleCallback( Local specifier, Local import_attributes, Local referrer) { + Isolate* isolate = context->GetIsolate(); Environment* env = Environment::GetCurrent(context); if (env == nullptr) { - Isolate* isolate = context->GetIsolate(); THROW_ERR_EXECUTION_ENVIRONMENT_NOT_AVAILABLE(isolate); return MaybeLocal(); } - Isolate* isolate = env->isolate(); - Utf8Value specifier_utf8(isolate, specifier); std::string specifier_std(*specifier_utf8, specifier_utf8.length()); @@ -559,11 +562,16 @@ static MaybeLocal ImportModuleDynamically( THROW_ERR_EXECUTION_ENVIRONMENT_NOT_AVAILABLE(isolate); return MaybeLocal(); } + Realm* realm = Realm::GetCurrent(context); + if (realm == nullptr) { + // Fallback to the principal realm if it's in a vm context. + realm = env->principal_realm(); + } EscapableHandleScope handle_scope(isolate); Local import_callback = - env->host_import_module_dynamically_callback(); + realm->host_import_module_dynamically_callback(); Local id; Local options = host_defined_options.As(); @@ -579,7 +587,7 @@ static MaybeLocal ImportModuleDynamically( } Local attributes = - createImportAttributesContainer(env, isolate, import_attributes); + createImportAttributesContainer(realm, isolate, import_attributes); Local import_args[] = { id, @@ -603,13 +611,13 @@ static MaybeLocal ImportModuleDynamically( void ModuleWrap::SetImportModuleDynamicallyCallback( const FunctionCallbackInfo& args) { Isolate* isolate = args.GetIsolate(); - Environment* env = Environment::GetCurrent(args); + Realm* realm = Realm::GetCurrent(args); HandleScope handle_scope(isolate); CHECK_EQ(args.Length(), 1); CHECK(args[0]->IsFunction()); Local import_callback = args[0].As(); - env->set_host_import_module_dynamically_callback(import_callback); + realm->set_host_import_module_dynamically_callback(import_callback); isolate->SetHostImportModuleDynamicallyCallback(ImportModuleDynamically); } @@ -624,10 +632,15 @@ void ModuleWrap::HostInitializeImportMetaObjectCallback( if (module_wrap == nullptr) { return; } + Realm* realm = Realm::GetCurrent(context); + if (realm == nullptr) { + // Fallback to the principal realm if it's in a vm context. + realm = env->principal_realm(); + } Local wrap = module_wrap->object(); Local callback = - env->host_initialize_import_meta_object_callback(); + realm->host_initialize_import_meta_object_callback(); Local id; if (!wrap->GetPrivate(context, env->host_defined_option_symbol()) .ToLocal(&id)) { @@ -637,7 +650,7 @@ void ModuleWrap::HostInitializeImportMetaObjectCallback( Local args[] = {id, meta}; TryCatchScope try_catch(env); USE(callback->Call( - context, Undefined(env->isolate()), arraysize(args), args)); + context, Undefined(realm->isolate()), arraysize(args), args)); if (try_catch.HasCaught() && !try_catch.HasTerminated()) { try_catch.ReThrow(); } @@ -645,13 +658,13 @@ void ModuleWrap::HostInitializeImportMetaObjectCallback( void ModuleWrap::SetInitializeImportMetaObjectCallback( const FunctionCallbackInfo& args) { - Environment* env = Environment::GetCurrent(args); - Isolate* isolate = env->isolate(); + Realm* realm = Realm::GetCurrent(args); + Isolate* isolate = realm->isolate(); CHECK_EQ(args.Length(), 1); CHECK(args[0]->IsFunction()); Local import_meta_callback = args[0].As(); - env->set_host_initialize_import_meta_object_callback(import_meta_callback); + realm->set_host_initialize_import_meta_object_callback(import_meta_callback); isolate->SetHostInitializeImportMetaObjectCallback( HostInitializeImportMetaObjectCallback); @@ -742,12 +755,9 @@ void ModuleWrap::CreateCachedData(const FunctionCallbackInfo& args) { } } -void ModuleWrap::Initialize(Local target, - Local unused, - Local context, - void* priv) { - Environment* env = Environment::GetCurrent(context); - Isolate* isolate = env->isolate(); +void ModuleWrap::CreatePerIsolateProperties(IsolateData* isolate_data, + Local target) { + Isolate* isolate = isolate_data->isolate(); Local tpl = NewFunctionTemplate(isolate, New); tpl->InstanceTemplate()->SetInternalFieldCount( @@ -767,28 +777,36 @@ void ModuleWrap::Initialize(Local target, "getStaticDependencySpecifiers", GetStaticDependencySpecifiers); - SetConstructorFunction(context, target, "ModuleWrap", tpl); + SetConstructorFunction(isolate, target, "ModuleWrap", tpl); - SetMethod(context, + SetMethod(isolate, target, "setImportModuleDynamicallyCallback", SetImportModuleDynamicallyCallback); - SetMethod(context, + SetMethod(isolate, target, "setInitializeImportMetaObjectCallback", SetInitializeImportMetaObjectCallback); +} +void ModuleWrap::CreatePerContextProperties(Local target, + Local unused, + Local context, + void* priv) { + Realm* realm = Realm::GetCurrent(context); + Isolate* isolate = realm->isolate(); #define V(name) \ - target->Set(context, \ - FIXED_ONE_BYTE_STRING(env->isolate(), #name), \ - Integer::New(env->isolate(), Module::Status::name)) \ - .FromJust() - V(kUninstantiated); - V(kInstantiating); - V(kInstantiated); - V(kEvaluating); - V(kEvaluated); - V(kErrored); + target \ + ->Set(context, \ + FIXED_ONE_BYTE_STRING(isolate, #name), \ + Integer::New(isolate, Module::Status::name)) \ + .FromJust() + V(kUninstantiated); + V(kInstantiating); + V(kInstantiated); + V(kEvaluating); + V(kEvaluated); + V(kErrored); #undef V } @@ -812,7 +830,9 @@ void ModuleWrap::RegisterExternalReferences( } // namespace loader } // namespace node -NODE_BINDING_CONTEXT_AWARE_INTERNAL(module_wrap, - node::loader::ModuleWrap::Initialize) +NODE_BINDING_CONTEXT_AWARE_INTERNAL( + module_wrap, node::loader::ModuleWrap::CreatePerContextProperties) +NODE_BINDING_PER_ISOLATE_INIT( + module_wrap, node::loader::ModuleWrap::CreatePerIsolateProperties) NODE_BINDING_EXTERNAL_REFERENCE( module_wrap, node::loader::ModuleWrap::RegisterExternalReferences) diff --git a/src/module_wrap.h b/src/module_wrap.h index 1fc801edced9c5..e17048357feca2 100644 --- a/src/module_wrap.h +++ b/src/module_wrap.h @@ -10,6 +10,7 @@ namespace node { +class IsolateData; class Environment; class ExternalReferenceRegistry; @@ -40,10 +41,12 @@ class ModuleWrap : public BaseObject { kInternalFieldCount }; - static void Initialize(v8::Local target, - v8::Local unused, - v8::Local context, - void* priv); + static void CreatePerIsolateProperties(IsolateData* isolate_data, + v8::Local target); + static void CreatePerContextProperties(v8::Local target, + v8::Local unused, + v8::Local context, + void* priv); static void RegisterExternalReferences(ExternalReferenceRegistry* registry); static void HostInitializeImportMetaObjectCallback( v8::Local context, @@ -66,7 +69,7 @@ class ModuleWrap : public BaseObject { } private: - ModuleWrap(Environment* env, + ModuleWrap(Realm* realm, v8::Local object, v8::Local module, v8::Local url, diff --git a/src/node_binding.h b/src/node_binding.h index 2901231090cce3..7256bf2bbcf732 100644 --- a/src/node_binding.h +++ b/src/node_binding.h @@ -40,6 +40,7 @@ static_assert(static_cast(NM_F_LINKED) == V(fs_dir) \ V(messaging) \ V(mksnapshot) \ + V(module_wrap) \ V(performance) \ V(process_methods) \ V(timers) \ diff --git a/src/node_errors.h b/src/node_errors.h index 569dafe82df83d..8591faab8eaf6f 100644 --- a/src/node_errors.h +++ b/src/node_errors.h @@ -125,6 +125,10 @@ void AppendExceptionLine(Environment* env, inline void THROW_##code( \ Environment* env, const char* format, Args&&... args) { \ THROW_##code(env->isolate(), format, std::forward(args)...); \ + } \ + template \ + inline void THROW_##code(Realm* realm, const char* format, Args&&... args) { \ + THROW_##code(realm->isolate(), format, std::forward(args)...); \ } ERRORS_WITH_CODE(V) #undef V diff --git a/src/node_shadow_realm.cc b/src/node_shadow_realm.cc index 1cf0a57617b8b2..e7199e18756521 100644 --- a/src/node_shadow_realm.cc +++ b/src/node_shadow_realm.cc @@ -1,6 +1,7 @@ #include "node_shadow_realm.h" #include "env-inl.h" #include "node_errors.h" +#include "node_process.h" namespace node { namespace shadow_realm { @@ -9,6 +10,8 @@ using v8::EscapableHandleScope; using v8::HandleScope; using v8::Local; using v8::MaybeLocal; +using v8::Object; +using v8::String; using v8::Value; using TryCatchScope = node::errors::TryCatchScope; @@ -16,6 +19,11 @@ using TryCatchScope = node::errors::TryCatchScope; // static ShadowRealm* ShadowRealm::New(Environment* env) { ShadowRealm* realm = new ShadowRealm(env); + // TODO(legendecas): required by node::PromiseRejectCallback. + // Remove this once promise rejection doesn't need to be handled across + // realms. + realm->context()->SetSecurityToken( + env->principal_realm()->context()->GetSecurityToken()); // We do not expect the realm bootstrapping to throw any // exceptions. If it does, exit the current Node.js instance. @@ -32,6 +40,10 @@ MaybeLocal HostCreateShadowRealmContextCallback( Local initiator_context) { Environment* env = Environment::GetCurrent(initiator_context); EscapableHandleScope scope(env->isolate()); + + // We do not expect the realm bootstrapping to throw any + // exceptions. If it does, exit the current Node.js instance. + TryCatchScope try_catch(env, TryCatchScope::CatchMode::kFatal); ShadowRealm* realm = ShadowRealm::New(env); if (realm != nullptr) { return scope.Escape(realm->context()); @@ -137,6 +149,28 @@ v8::MaybeLocal ShadowRealm::BootstrapRealm() { } } + // The process object is not exposed globally in ShadowRealm yet. + // However, the process properties need to be setup for built-in modules. + // Specifically, process.cwd() is needed by the ESM loader. + if (ExecuteBootstrapper( + "internal/bootstrap/switches/does_not_own_process_state") + .IsEmpty()) { + return MaybeLocal(); + } + + // Setup process.env proxy. + Local env_string = FIXED_ONE_BYTE_STRING(isolate_, "env"); + Local env_proxy; + if (!isolate_data()->env_proxy_template()->NewInstance(context()).ToLocal( + &env_proxy) || + process_object()->Set(context(), env_string, env_proxy).IsNothing()) { + return MaybeLocal(); + } + + if (ExecuteBootstrapper("internal/bootstrap/shadow_realm").IsEmpty()) { + return MaybeLocal(); + } + return v8::True(isolate_); } diff --git a/test/fixtures/es-module-shadow-realm/custom-loaders.js b/test/fixtures/es-module-shadow-realm/custom-loaders.js new file mode 100644 index 00000000000000..bf4402250cc4f8 --- /dev/null +++ b/test/fixtures/es-module-shadow-realm/custom-loaders.js @@ -0,0 +1,15 @@ +// This fixture is used to test that custom loaders are not enabled in the ShadowRealm. + +'use strict'; +const assert = require('assert'); + +async function workInChildProcess() { + // Assert that the process is running with a custom loader. + const moduleNamespace = await import('file:///42.mjs'); + assert.strictEqual(moduleNamespace.default, 42); + + const realm = new ShadowRealm(); + await assert.rejects(realm.importValue('file:///42.mjs', 'default'), TypeError); +} + +workInChildProcess(); diff --git a/test/fixtures/es-module-shadow-realm/preload-main.js b/test/fixtures/es-module-shadow-realm/preload-main.js new file mode 100644 index 00000000000000..4258b012ad6139 --- /dev/null +++ b/test/fixtures/es-module-shadow-realm/preload-main.js @@ -0,0 +1,9 @@ +// This fixture is used to test that --require preload modules are not enabled in the ShadowRealm. + +'use strict'; +const assert = require('assert'); + +assert.strictEqual(globalThis.preload, 42); +const realm = new ShadowRealm(); +const value = realm.evaluate(`globalThis.preload`); +assert.strictEqual(value, undefined); diff --git a/test/fixtures/es-module-shadow-realm/preload.js b/test/fixtures/es-module-shadow-realm/preload.js new file mode 100644 index 00000000000000..dbbcb65e5a17e7 --- /dev/null +++ b/test/fixtures/es-module-shadow-realm/preload.js @@ -0,0 +1 @@ +globalThis.preload = 42; diff --git a/test/fixtures/es-module-shadow-realm/re-export-state-counter.mjs b/test/fixtures/es-module-shadow-realm/re-export-state-counter.mjs new file mode 100644 index 00000000000000..50a6aa3fe1e5b1 --- /dev/null +++ b/test/fixtures/es-module-shadow-realm/re-export-state-counter.mjs @@ -0,0 +1,3 @@ +// This module verifies that the module specifier is resolved relative to the +// current module and not the current working directory in the ShadowRealm. +export { getCounter } from "./state-counter.mjs"; diff --git a/test/fixtures/es-module-shadow-realm/state-counter.mjs b/test/fixtures/es-module-shadow-realm/state-counter.mjs new file mode 100644 index 00000000000000..c547658bf455ba --- /dev/null +++ b/test/fixtures/es-module-shadow-realm/state-counter.mjs @@ -0,0 +1,4 @@ +let counter = 0; +export const getCounter = () => { + return counter++; +}; diff --git a/test/parallel/test-shadow-realm-allowed-builtin-modules.js b/test/parallel/test-shadow-realm-allowed-builtin-modules.js new file mode 100644 index 00000000000000..2aa550ac7bb55b --- /dev/null +++ b/test/parallel/test-shadow-realm-allowed-builtin-modules.js @@ -0,0 +1,21 @@ +// Flags: --experimental-shadow-realm +'use strict'; +const common = require('../common'); +const assert = require('assert'); + +async function main() { + // Verifies that builtin modules can not be imported in the ShadowRealm. + const realm = new ShadowRealm(); + // The error object created inside the ShadowRealm with the error code + // property is not copied on the realm boundary. Only the error message + // is copied. Simply check the error message here. + await assert.rejects(realm.importValue('fs', 'readFileSync'), { + message: /Cannot find package 'fs'/, + }); + // As above, we can only validate the error message, not the error code. + await assert.rejects(realm.importValue('node:fs', 'readFileSync'), { + message: /No such built-in module: node:fs/, + }); +} + +main().then(common.mustCall()); diff --git a/test/parallel/test-shadow-realm-custom-loaders.js b/test/parallel/test-shadow-realm-custom-loaders.js new file mode 100644 index 00000000000000..80cda74bb88940 --- /dev/null +++ b/test/parallel/test-shadow-realm-custom-loaders.js @@ -0,0 +1,26 @@ +'use strict'; +const common = require('../common'); +const fixtures = require('../common/fixtures'); +const assert = require('assert'); + +const commonArgs = [ + '--experimental-shadow-realm', + '--no-warnings', +]; + +async function main() { + // Verifies that custom loaders are not enabled in the ShadowRealm. + const child = await common.spawnPromisified(process.execPath, [ + ...commonArgs, + '--experimental-loader', + fixtures.fileURL('es-module-loaders', 'loader-resolve-shortcircuit.mjs'), + '--experimental-loader', + fixtures.fileURL('es-module-loaders', 'loader-load-foo-or-42.mjs'), + fixtures.path('es-module-shadow-realm', 'custom-loaders.js'), + ]); + + assert.strictEqual(child.stderr, ''); + assert.strictEqual(child.code, 0); +} + +main().then(common.mustCall()); diff --git a/test/parallel/test-shadow-realm-gc-module.js b/test/parallel/test-shadow-realm-gc-module.js new file mode 100644 index 00000000000000..7f822bdd52fe1d --- /dev/null +++ b/test/parallel/test-shadow-realm-gc-module.js @@ -0,0 +1,20 @@ +// Flags: --experimental-shadow-realm --max-old-space-size=20 +'use strict'; + +/** + * Verifying modules imported by ShadowRealm instances can be correctly + * garbage collected. + */ + +const common = require('../common'); +const fixtures = require('../common/fixtures'); + +async function main() { + const mod = fixtures.fileURL('es-module-shadow-realm', 'state-counter.mjs'); + for (let i = 0; i < 100; i++) { + const realm = new ShadowRealm(); + await realm.importValue(mod, 'getCounter'); + } +} + +main().then(common.mustCall()); diff --git a/test/parallel/test-shadow-realm-import-value-resolve.js b/test/parallel/test-shadow-realm-import-value-resolve.js new file mode 100644 index 00000000000000..ee1c17d67c12f1 --- /dev/null +++ b/test/parallel/test-shadow-realm-import-value-resolve.js @@ -0,0 +1,28 @@ +// Flags: --experimental-shadow-realm +'use strict'; +const common = require('../common'); +const assert = require('assert'); +const path = require('path'); + +common.skipIfWorker('process.chdir is not supported in workers.'); + +async function main() { + const realm = new ShadowRealm(); + + const dirname = __dirname; + // Set process cwd to the parent directory of __dirname. + const cwd = path.dirname(dirname); + process.chdir(cwd); + // Hardcode the relative path to ensure the string is still a valid relative + // URL string. + const relativePath = './fixtures/es-module-shadow-realm/re-export-state-counter.mjs'; + + // Make sure that the module can not be resolved relative to __filename. + assert.throws(() => require.resolve(relativePath), { code: 'MODULE_NOT_FOUND' }); + + // Resolve relative to the current working directory. + const getCounter = await realm.importValue(relativePath, 'getCounter'); + assert.strictEqual(typeof getCounter, 'function'); +} + +main().then(common.mustCall()); diff --git a/test/parallel/test-shadow-realm-module.js b/test/parallel/test-shadow-realm-module.js new file mode 100644 index 00000000000000..bc0c2c04f69f2b --- /dev/null +++ b/test/parallel/test-shadow-realm-module.js @@ -0,0 +1,29 @@ +// Flags: --experimental-shadow-realm +'use strict'; +const common = require('../common'); +const fixtures = require('../common/fixtures'); +const assert = require('assert'); + +async function main() { + const realm = new ShadowRealm(); + const mod = fixtures.fileURL('es-module-shadow-realm', 'state-counter.mjs'); + const getCounter = await realm.importValue(mod, 'getCounter'); + assert.strictEqual(getCounter(), 0); + const getCounter1 = await realm.importValue(mod, 'getCounter'); + // Returned value is a newly wrapped function. + assert.notStrictEqual(getCounter, getCounter1); + // Verify that the module state is shared between two `importValue` calls. + assert.strictEqual(getCounter1(), 1); + assert.strictEqual(getCounter(), 2); + + const { getCounter: getCounterThisRealm } = await import(mod); + assert.notStrictEqual(getCounterThisRealm, getCounter); + // Verify that the module state is not shared between two realms. + assert.strictEqual(getCounterThisRealm(), 0); + assert.strictEqual(getCounter(), 3); + + // Verify that shadow realm rejects to import a non-existing module. + await assert.rejects(realm.importValue('non-exists', 'exports'), TypeError); +} + +main().then(common.mustCall()); diff --git a/test/parallel/test-shadow-realm-preload-module.js b/test/parallel/test-shadow-realm-preload-module.js new file mode 100644 index 00000000000000..ebd29c1c4a8b80 --- /dev/null +++ b/test/parallel/test-shadow-realm-preload-module.js @@ -0,0 +1,20 @@ +'use strict'; +const common = require('../common'); +const fixtures = require('../common/fixtures'); +const { spawnSyncAndExitWithoutError } = require('../common/child_process'); + +const commonArgs = [ + '--experimental-shadow-realm', +]; + +async function main() { + // Verifies that --require preload modules are not enabled in the ShadowRealm. + spawnSyncAndExitWithoutError(process.execPath, [ + ...commonArgs, + '--require', + fixtures.path('es-module-shadow-realm', 'preload.js'), + fixtures.path('es-module-shadow-realm', 'preload-main.js'), + ]); +} + +main().then(common.mustCall()); From b2b4132c3eff62cc64409426d3780cfb55056e6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Zasso?= Date: Sun, 19 Nov 2023 22:22:49 +0100 Subject: [PATCH 227/229] src: iterate on import attributes array correctly The array's length is supposed to be a multiple of two for dynamic import callbacks. Fixes: https://github.com/nodejs/node/issues/50700 PR-URL: https://github.com/nodejs/node/pull/50703 Reviewed-By: Antoine du Hamel Reviewed-By: Shelley Vohr Reviewed-By: Joyee Cheung Reviewed-By: James M Snell --- src/module_wrap.cc | 12 ++++++++---- test/es-module/test-esm-import-attributes-errors.js | 5 +++++ test/es-module/test-esm-import-attributes-errors.mjs | 5 +++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/module_wrap.cc b/src/module_wrap.cc index 00ee4627f0f05a..895ff3a5948add 100644 --- a/src/module_wrap.cc +++ b/src/module_wrap.cc @@ -254,10 +254,14 @@ void ModuleWrap::New(const FunctionCallbackInfo& args) { } static Local createImportAttributesContainer( - Realm* realm, Isolate* isolate, Local raw_attributes) { + Realm* realm, + Isolate* isolate, + Local raw_attributes, + const int elements_per_attribute) { + CHECK_EQ(raw_attributes->Length() % elements_per_attribute, 0); Local attributes = Object::New(isolate, v8::Null(isolate), nullptr, nullptr, 0); - for (int i = 0; i < raw_attributes->Length(); i += 3) { + for (int i = 0; i < raw_attributes->Length(); i += elements_per_attribute) { attributes ->Set(realm->context(), raw_attributes->Get(realm->context(), i).As(), @@ -303,7 +307,7 @@ void ModuleWrap::Link(const FunctionCallbackInfo& args) { Local raw_attributes = module_request->GetImportAssertions(); Local attributes = - createImportAttributesContainer(realm, isolate, raw_attributes); + createImportAttributesContainer(realm, isolate, raw_attributes, 3); Local argv[] = { specifier, @@ -587,7 +591,7 @@ static MaybeLocal ImportModuleDynamically( } Local attributes = - createImportAttributesContainer(realm, isolate, import_attributes); + createImportAttributesContainer(realm, isolate, import_attributes, 2); Local import_args[] = { id, diff --git a/test/es-module/test-esm-import-attributes-errors.js b/test/es-module/test-esm-import-attributes-errors.js index 4521248db176e5..976d4a3f67f778 100644 --- a/test/es-module/test-esm-import-attributes-errors.js +++ b/test/es-module/test-esm-import-attributes-errors.js @@ -26,6 +26,11 @@ async function test() { { code: 'ERR_IMPORT_ASSERTION_TYPE_FAILED' } ); + await rejects( + import(jsModuleDataUrl, { with: { type: 'json', other: 'unsupported' } }), + { code: 'ERR_IMPORT_ASSERTION_TYPE_FAILED' } + ); + await rejects( import(jsModuleDataUrl, { with: { type: 'unsupported' } }), { code: 'ERR_IMPORT_ASSERTION_TYPE_UNSUPPORTED' } diff --git a/test/es-module/test-esm-import-attributes-errors.mjs b/test/es-module/test-esm-import-attributes-errors.mjs index ff932636e39a5f..072b94b1b0fc55 100644 --- a/test/es-module/test-esm-import-attributes-errors.mjs +++ b/test/es-module/test-esm-import-attributes-errors.mjs @@ -21,6 +21,11 @@ await rejects( { code: 'ERR_IMPORT_ASSERTION_TYPE_FAILED' } ); +await rejects( + import(jsModuleDataUrl, { with: { type: 'json', other: 'unsupported' } }), + { code: 'ERR_IMPORT_ASSERTION_TYPE_FAILED' } +); + await rejects( import(import.meta.url, { with: { type: 'unsupported' } }), { code: 'ERR_IMPORT_ASSERTION_TYPE_UNSUPPORTED' } From 435f9c92767534bf60dfc506670feef59e451cbb Mon Sep 17 00:00:00 2001 From: Murilo Kakazu Date: Sun, 3 Dec 2023 10:49:50 -0300 Subject: [PATCH 228/229] fs: use default w flag for writeFileSync with utf8 encoding PR-URL: https://github.com/nodejs/node/pull/50990 Reviewed-By: Luigi Pinca Reviewed-By: Yagiz Nizipli --- lib/fs.js | 6 +++--- test/parallel/test-fs-write-file-sync.js | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/fs.js b/lib/fs.js index a65bd0274e6f82..a3a9f3386de2b4 100644 --- a/lib/fs.js +++ b/lib/fs.js @@ -2341,6 +2341,8 @@ function writeFileSync(path, data, options) { validateBoolean(flush, 'options.flush'); + const flag = options.flag || 'w'; + // C++ fast path for string data and UTF8 encoding if (typeof data === 'string' && (options.encoding === 'utf8' || options.encoding === 'utf-8')) { if (!isInt32(path)) { @@ -2349,7 +2351,7 @@ function writeFileSync(path, data, options) { return binding.writeFileUtf8( path, data, - stringToFlags(options.flag), + stringToFlags(flag), parseFileMode(options.mode, 'mode', 0o666), ); } @@ -2359,8 +2361,6 @@ function writeFileSync(path, data, options) { data = Buffer.from(data, options.encoding || 'utf8'); } - const flag = options.flag || 'w'; - const isUserFd = isFd(path); // File descriptor ownership const fd = isUserFd ? path : fs.openSync(path, flag, options.mode); diff --git a/test/parallel/test-fs-write-file-sync.js b/test/parallel/test-fs-write-file-sync.js index 26aa819a8107f0..4ead91530bb748 100644 --- a/test/parallel/test-fs-write-file-sync.js +++ b/test/parallel/test-fs-write-file-sync.js @@ -101,6 +101,22 @@ tmpdir.refresh(); assert.strictEqual(content, 'hello world!'); } +// Test writeFileSync with no flags +{ + const utf8Data = 'hello world!'; + for (const test of [ + { data: utf8Data }, + { data: utf8Data, options: { encoding: 'utf8' } }, + { data: Buffer.from(utf8Data, 'utf8').toString('hex'), options: { encoding: 'hex' } }, + ]) { + const file = tmpdir.resolve(`testWriteFileSyncNewFile_${Math.random()}.txt`); + fs.writeFileSync(file, test.data, test.options); + + const content = fs.readFileSync(file, { encoding: 'utf-8' }); + assert.strictEqual(content, utf8Data); + } +} + // Test writeFileSync with an invalid input { const file = tmpdir.resolve('testWriteFileSyncInvalid.txt'); From 8d4fbd7bb11a300157aeff8e13221c093628dd4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulises=20Gasc=C3=B3n?= Date: Tue, 12 Dec 2023 00:10:33 +0000 Subject: [PATCH 229/229] 2024-01-09, Version 20.11.0 'Iron' (LTS) Notable changes: crypto: * update root certificates to NSS 3.95 (Node.js GitHub Bot) https://github.com/nodejs/node/pull/50805 doc: * add MrJithil to collaborators (Jithil P Ponnan) https://github.com/nodejs/node/pull/50666 * add Ethan-Arrowood as a collaborator (Ethan Arrowood) https://github.com/nodejs/node/pull/50393 esm: * (SEMVER-MINOR) add import.meta.dirname and import.meta.filename (James Sumners) https://github.com/nodejs/node/pull/48740 fs: * add c++ fast path for writeFileSync utf8 (CanadaHonk) https://github.com/nodejs/node/pull/49884 module: * (SEMVER-MINOR) remove useCustomLoadersIfPresent flag (Chengzhong Wu) https://github.com/nodejs/node/pull/48655 * (SEMVER-MINOR) bootstrap module loaders in shadow realm (Chengzhong Wu) https://github.com/nodejs/node/pull/48655 src: * (SEMVER-MINOR) add `--disable-warning` option (Ethan Arrowood) https://github.com/nodejs/node/pull/50661 * (SEMVER-MINOR) create per isolate proxy env template (Chengzhong Wu) https://github.com/nodejs/node/pull/48655 * (SEMVER-MINOR) make process binding data weak (Chengzhong Wu) https://github.com/nodejs/node/pull/48655 stream: * use Array for Readable buffer (Robert Nagy) https://github.com/nodejs/node/pull/50341 * optimize creation (Robert Nagy) https://github.com/nodejs/node/pull/50337 test_runner: * (SEMVER-MINOR) adds built in lcov reporter (Phil Nash) https://github.com/nodejs/node/pull/50018 * (SEMVER-MINOR) add Date to the supported mock APIs (Lucas Santos) https://github.com/nodejs/node/pull/48638 test_runner, cli: * (SEMVER-MINOR) add --test-timeout flag (Shubham Pandey) https://github.com/nodejs/node/pull/50443 PR-URL: https://github.com/nodejs/node/pull/51124 --- CHANGELOG.md | 3 +- doc/api/cli.md | 4 +- doc/api/deprecations.md | 2 +- doc/api/esm.md | 4 +- doc/api/test.md | 4 +- doc/api/vm.md | 2 +- doc/api/wasi.md | 4 +- doc/changelogs/CHANGELOG_V20.md | 254 ++++++++++++++++++++++++++++++++ src/node_version.h | 6 +- 9 files changed, 269 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b71f71e187aa1d..dc39073b06aba1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,7 +35,8 @@ release. -20.10.0
+20.11.0
+20.10.0
20.9.0
20.8.1
20.8.0
diff --git a/doc/api/cli.md b/doc/api/cli.md index 77f23492f65493..7e67abca53b118 100644 --- a/doc/api/cli.md +++ b/doc/api/cli.md @@ -448,7 +448,7 @@ Affects the default output directory of: > Stability: 1.1 - Active development Disable specific process warnings by `code` or `type`. @@ -1797,7 +1797,7 @@ node --test --test-shard=3/3 ### `--test-timeout` A number of milliseconds the test execution will fail after. If unspecified, diff --git a/doc/api/deprecations.md b/doc/api/deprecations.md index 7b03294eb3dd9a..5bf71dd424e70c 100644 --- a/doc/api/deprecations.md +++ b/doc/api/deprecations.md @@ -3424,7 +3424,7 @@ deprecated. Get them from `fs.constants` or `fs.promises.constants` instead. > Stability: 1.2 - Release candidate @@ -348,7 +348,7 @@ added: REPLACEME ### `import.meta.filename` > Stability: 1.2 - Release candidate diff --git a/doc/api/test.md b/doc/api/test.md index 0d3dd96482ae72..a35c33c598bf73 100644 --- a/doc/api/test.md +++ b/doc/api/test.md @@ -1837,7 +1837,7 @@ which is a `MockTimers` instance. added: - v20.4.0 changes: - - version: REPLACEME + - version: v20.11.0 pr-url: https://github.com/nodejs/node/pull/48638 description: Updated parameters to be an option object with available APIs and the default initial epoch. @@ -2323,7 +2323,7 @@ clocks or actual timers outside of the mocking environment. Sets the current Unix timestamp that will be used as reference for any mocked diff --git a/doc/api/vm.md b/doc/api/vm.md index 0e2de8a40ecec7..0d487e434c1e37 100644 --- a/doc/api/vm.md +++ b/doc/api/vm.md @@ -1052,7 +1052,7 @@ function with the given `params`. diff --git a/doc/changelogs/CHANGELOG_V20.md b/doc/changelogs/CHANGELOG_V20.md index 74734b8668c882..b0eb2ee4710126 100644 --- a/doc/changelogs/CHANGELOG_V20.md +++ b/doc/changelogs/CHANGELOG_V20.md @@ -9,6 +9,7 @@ +20.11.0
20.10.0
20.9.0
@@ -52,6 +53,259 @@ * [io.js](CHANGELOG_IOJS.md) * [Archive](CHANGELOG_ARCHIVE.md) + + +## 2024-01-09, Version 20.11.0 'Iron' (LTS), @UlisesGascon + +### Notable Changes + +* \[[`833190fe7c`](https://github.com/nodejs/node/commit/833190fe7c)] - **crypto**: update root certificates to NSS 3.95 (Node.js GitHub Bot) [#50805](https://github.com/nodejs/node/pull/50805) +* \[[`a541b78bdb`](https://github.com/nodejs/node/commit/a541b78bdb)] - **doc**: add MrJithil to collaborators (Jithil P Ponnan) [#50666](https://github.com/nodejs/node/pull/50666) +* \[[`d4be8fad83`](https://github.com/nodejs/node/commit/d4be8fad83)] - **doc**: add Ethan-Arrowood as a collaborator (Ethan Arrowood) [#50393](https://github.com/nodejs/node/pull/50393) +* \[[`c1a196c897`](https://github.com/nodejs/node/commit/c1a196c897)] - **(SEMVER-MINOR)** **esm**: add import.meta.dirname and import.meta.filename (James Sumners) [#48740](https://github.com/nodejs/node/pull/48740) +* \[[`aa3209b880`](https://github.com/nodejs/node/commit/aa3209b880)] - **fs**: add c++ fast path for writeFileSync utf8 (CanadaHonk) [#49884](https://github.com/nodejs/node/pull/49884) +* \[[`8e886a2fff`](https://github.com/nodejs/node/commit/8e886a2fff)] - **(SEMVER-MINOR)** **module**: remove useCustomLoadersIfPresent flag (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`21ab3c0f0b`](https://github.com/nodejs/node/commit/21ab3c0f0b)] - **(SEMVER-MINOR)** **module**: bootstrap module loaders in shadow realm (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`29d91b13e3`](https://github.com/nodejs/node/commit/29d91b13e3)] - **(SEMVER-MINOR)** **src**: add `--disable-warning` option (Ethan Arrowood) [#50661](https://github.com/nodejs/node/pull/50661) +* \[[`11b3e470db`](https://github.com/nodejs/node/commit/11b3e470db)] - **(SEMVER-MINOR)** **src**: create per isolate proxy env template (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`621c4d66c2`](https://github.com/nodejs/node/commit/621c4d66c2)] - **(SEMVER-MINOR)** **src**: make process binding data weak (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`139d6c8d3b`](https://github.com/nodejs/node/commit/139d6c8d3b)] - **stream**: use Array for Readable buffer (Robert Nagy) [#50341](https://github.com/nodejs/node/pull/50341) +* \[[`6206957e8d`](https://github.com/nodejs/node/commit/6206957e8d)] - **stream**: optimize creation (Robert Nagy) [#50337](https://github.com/nodejs/node/pull/50337) +* \[[`e64378643d`](https://github.com/nodejs/node/commit/e64378643d)] - **(SEMVER-MINOR)** **test\_runner**: adds built in lcov reporter (Phil Nash) [#50018](https://github.com/nodejs/node/pull/50018) +* \[[`4a830c2d9d`](https://github.com/nodejs/node/commit/4a830c2d9d)] - **(SEMVER-MINOR)** **test\_runner**: add Date to the supported mock APIs (Lucas Santos) [#48638](https://github.com/nodejs/node/pull/48638) +* \[[`842dc01def`](https://github.com/nodejs/node/commit/842dc01def)] - **(SEMVER-MINOR)** **test\_runner, cli**: add --test-timeout flag (Shubham Pandey) [#50443](https://github.com/nodejs/node/pull/50443) + +### Commits + +* \[[`e40a559ab1`](https://github.com/nodejs/node/commit/e40a559ab1)] - **benchmark**: update iterations in benchmark/util/splice-one.js (Liu Jia) [#50698](https://github.com/nodejs/node/pull/50698) +* \[[`00f7a5d26f`](https://github.com/nodejs/node/commit/00f7a5d26f)] - **benchmark**: increase the iteration number to an appropriate value (Lei Shi) [#50766](https://github.com/nodejs/node/pull/50766) +* \[[`be6ad3f375`](https://github.com/nodejs/node/commit/be6ad3f375)] - **benchmark**: rewrite import.meta benchmark (Joyee Cheung) [#50683](https://github.com/nodejs/node/pull/50683) +* \[[`9857364129`](https://github.com/nodejs/node/commit/9857364129)] - **benchmark**: add misc/startup-cli-version benchmark (Joyee Cheung) [#50684](https://github.com/nodejs/node/pull/50684) +* \[[`22d729e7f5`](https://github.com/nodejs/node/commit/22d729e7f5)] - **benchmark**: remove punycode from require-builtins fixture (Joyee Cheung) [#50689](https://github.com/nodejs/node/pull/50689) +* \[[`4cf10a149a`](https://github.com/nodejs/node/commit/4cf10a149a)] - **benchmark**: change iterations in benchmark/es/string-concatenations.js (Liu Jia) [#50585](https://github.com/nodejs/node/pull/50585) +* \[[`15c2ed93a8`](https://github.com/nodejs/node/commit/15c2ed93a8)] - **benchmark**: add benchmarks for encodings (Aras Abbasi) [#50348](https://github.com/nodejs/node/pull/50348) +* \[[`8a896428ca`](https://github.com/nodejs/node/commit/8a896428ca)] - **benchmark**: add more cases to Readable.from (Raz Luvaton) [#50351](https://github.com/nodejs/node/pull/50351) +* \[[`dbe6c5f354`](https://github.com/nodejs/node/commit/dbe6c5f354)] - **benchmark**: skip test-benchmark-os on IBMi (Michael Dawson) [#50286](https://github.com/nodejs/node/pull/50286) +* \[[`179b4b6e62`](https://github.com/nodejs/node/commit/179b4b6e62)] - **benchmark**: move permission-fs-read to permission-processhas-fs-read (Aki Hasegawa-Johnson) [#49770](https://github.com/nodejs/node/pull/49770) +* \[[`32d65c001d`](https://github.com/nodejs/node/commit/32d65c001d)] - **buffer**: improve Buffer.equals performance (kylo5aby) [#50621](https://github.com/nodejs/node/pull/50621) +* \[[`80ea83757e`](https://github.com/nodejs/node/commit/80ea83757e)] - **build**: add GN configurations for simdjson (Cheng Zhao) [#50831](https://github.com/nodejs/node/pull/50831) +* \[[`904e645bcd`](https://github.com/nodejs/node/commit/904e645bcd)] - **build**: add configuration flag to enable Maglev (Keyhan Vakil) [#50692](https://github.com/nodejs/node/pull/50692) +* \[[`019efa8a5a`](https://github.com/nodejs/node/commit/019efa8a5a)] - **build**: fix GN configuration for deps/base64 (Cheng Zhao) [#50696](https://github.com/nodejs/node/pull/50696) +* \[[`a645d5ac54`](https://github.com/nodejs/node/commit/a645d5ac54)] - **build**: disable flag v8\_scriptormodule\_legacy\_lifetime (Chengzhong Wu) [#50616](https://github.com/nodejs/node/pull/50616) +* \[[`8705058b09`](https://github.com/nodejs/node/commit/8705058b09)] - **build**: add GN build files (Cheng Zhao) [#47637](https://github.com/nodejs/node/pull/47637) +* \[[`0a5e9c12cf`](https://github.com/nodejs/node/commit/0a5e9c12cf)] - **build**: fix build with Python 3.12 (Luigi Pinca) [#50582](https://github.com/nodejs/node/pull/50582) +* \[[`ff5713dd43`](https://github.com/nodejs/node/commit/ff5713dd43)] - **build**: support Python 3.12 (Shi Pujin) [#50209](https://github.com/nodejs/node/pull/50209) +* \[[`cfd50f229a`](https://github.com/nodejs/node/commit/cfd50f229a)] - **build**: fix building when there is only python3 (Cheng Zhao) [#48462](https://github.com/nodejs/node/pull/48462) +* \[[`833190fe7c`](https://github.com/nodejs/node/commit/833190fe7c)] - **crypto**: update root certificates to NSS 3.95 (Node.js GitHub Bot) [#50805](https://github.com/nodejs/node/pull/50805) +* \[[`54c46dae9e`](https://github.com/nodejs/node/commit/54c46dae9e)] - **deps**: update zlib to 1.2.13.1-motley-5daffc7 (Node.js GitHub Bot) [#50803](https://github.com/nodejs/node/pull/50803) +* \[[`0be84e5a28`](https://github.com/nodejs/node/commit/0be84e5a28)] - **deps**: update undici to 5.27.2 (Node.js GitHub Bot) [#50813](https://github.com/nodejs/node/pull/50813) +* \[[`ec67890824`](https://github.com/nodejs/node/commit/ec67890824)] - **deps**: V8: cherry-pick 0f9ebbc672c7 (Chengzhong Wu) [#50867](https://github.com/nodejs/node/pull/50867) +* \[[`bc2ebb972b`](https://github.com/nodejs/node/commit/bc2ebb972b)] - **deps**: V8: cherry-pick 13192d6e10fa (Levi Zim) [#50552](https://github.com/nodejs/node/pull/50552) +* \[[`656135d70a`](https://github.com/nodejs/node/commit/656135d70a)] - **deps**: update zlib to 1.2.13.1-motley-dfc48fc (Node.js GitHub Bot) [#50456](https://github.com/nodejs/node/pull/50456) +* \[[`41ee4bcc5d`](https://github.com/nodejs/node/commit/41ee4bcc5d)] - **deps**: update ada to 2.7.4 (Node.js GitHub Bot) [#50815](https://github.com/nodejs/node/pull/50815) +* \[[`a40948b5c5`](https://github.com/nodejs/node/commit/a40948b5c5)] - **deps**: update minimatch to 9.0.3 (Node.js GitHub Bot) [#50806](https://github.com/nodejs/node/pull/50806) +* \[[`7be1222c4a`](https://github.com/nodejs/node/commit/7be1222c4a)] - **deps**: update simdutf to 4.0.4 (Node.js GitHub Bot) [#50772](https://github.com/nodejs/node/pull/50772) +* \[[`68e7d49db6`](https://github.com/nodejs/node/commit/68e7d49db6)] - **deps**: upgrade npm to 10.2.4 (npm team) [#50751](https://github.com/nodejs/node/pull/50751) +* \[[`3d82d38336`](https://github.com/nodejs/node/commit/3d82d38336)] - **deps**: escape Python strings correctly (Michaël Zasso) [#50695](https://github.com/nodejs/node/pull/50695) +* \[[`d3870ac957`](https://github.com/nodejs/node/commit/d3870ac957)] - **deps**: update base64 to 0.5.1 (Node.js GitHub Bot) [#50629](https://github.com/nodejs/node/pull/50629) +* \[[`4b219b6ece`](https://github.com/nodejs/node/commit/4b219b6ece)] - **deps**: update corepack to 0.23.0 (Node.js GitHub Bot) [#50563](https://github.com/nodejs/node/pull/50563) +* \[[`6c41b50922`](https://github.com/nodejs/node/commit/6c41b50922)] - **deps**: update nghttp2 to 1.58.0 (Node.js GitHub Bot) [#50441](https://github.com/nodejs/node/pull/50441) +* \[[`3beee0ae8f`](https://github.com/nodejs/node/commit/3beee0ae8f)] - **deps**: update acorn to 8.11.2 (Node.js GitHub Bot) [#50460](https://github.com/nodejs/node/pull/50460) +* \[[`220916fa93`](https://github.com/nodejs/node/commit/220916fa93)] - **deps**: update undici to 5.27.0 (Node.js GitHub Bot) [#50463](https://github.com/nodejs/node/pull/50463) +* \[[`f9960b3545`](https://github.com/nodejs/node/commit/f9960b3545)] - **deps**: update googletest to 116b7e5 (Node.js GitHub Bot) [#50324](https://github.com/nodejs/node/pull/50324) +* \[[`d5c16f897a`](https://github.com/nodejs/node/commit/d5c16f897a)] - **dns**: call handle.setServers() with a valid array (Luigi Pinca) [#50811](https://github.com/nodejs/node/pull/50811) +* \[[`1bd6537c97`](https://github.com/nodejs/node/commit/1bd6537c97)] - **doc**: recommend supported Python versions (Luigi Pinca) [#50407](https://github.com/nodejs/node/pull/50407) +* \[[`402e257520`](https://github.com/nodejs/node/commit/402e257520)] - **doc**: update notable changes in v21.1.0 (Joyee Cheung) [#50388](https://github.com/nodejs/node/pull/50388) +* \[[`032535e270`](https://github.com/nodejs/node/commit/032535e270)] - **doc**: make theme consistent across api and other docs (Dima Demakov) [#50877](https://github.com/nodejs/node/pull/50877) +* \[[`d53842683f`](https://github.com/nodejs/node/commit/d53842683f)] - **doc**: add a section regarding `instanceof` in `primordials.md` (Antoine du Hamel) [#50874](https://github.com/nodejs/node/pull/50874) +* \[[`fe315055a7`](https://github.com/nodejs/node/commit/fe315055a7)] - **doc**: update email to reflect affiliation (Yagiz Nizipli) [#50856](https://github.com/nodejs/node/pull/50856) +* \[[`e14f661950`](https://github.com/nodejs/node/commit/e14f661950)] - **doc**: shard not supported with watch mode (Pulkit Gupta) [#50640](https://github.com/nodejs/node/pull/50640) +* \[[`b3d015de71`](https://github.com/nodejs/node/commit/b3d015de71)] - **doc**: get rid of unnecessary `eslint-skip` comments (Antoine du Hamel) [#50829](https://github.com/nodejs/node/pull/50829) +* \[[`168cbf9cb9`](https://github.com/nodejs/node/commit/168cbf9cb9)] - **doc**: create deprecation code for isWebAssemblyCompiledModule (Marco Ippolito) [#50486](https://github.com/nodejs/node/pull/50486) +* \[[`30baacba41`](https://github.com/nodejs/node/commit/30baacba41)] - **doc**: add CanadaHonk to triagers (CanadaHonk) [#50848](https://github.com/nodejs/node/pull/50848) +* \[[`e6e7cbceac`](https://github.com/nodejs/node/commit/e6e7cbceac)] - **doc**: fix typos in --allow-fs-\* (Tobias Nießen) [#50845](https://github.com/nodejs/node/pull/50845) +* \[[`e22ce9586f`](https://github.com/nodejs/node/commit/e22ce9586f)] - **doc**: update Crypto API doc for x509.keyUsage (Daniel Meechan) [#50603](https://github.com/nodejs/node/pull/50603) +* \[[`549d4422b7`](https://github.com/nodejs/node/commit/549d4422b7)] - **doc**: fix fs.writeFileSync return value documentation (Ryan Zimmerman) [#50760](https://github.com/nodejs/node/pull/50760) +* \[[`3c79e3cdba`](https://github.com/nodejs/node/commit/3c79e3cdba)] - **doc**: update print results(detail) in `PerformanceEntry` (Jungku Lee) [#50723](https://github.com/nodejs/node/pull/50723) +* \[[`aeaf96d06e`](https://github.com/nodejs/node/commit/aeaf96d06e)] - **doc**: fix `Buffer.allocUnsafe` documentation (Mert Can Altın) [#50686](https://github.com/nodejs/node/pull/50686) +* \[[`347e1dd06a`](https://github.com/nodejs/node/commit/347e1dd06a)] - **doc**: run license-builder (github-actions\[bot]) [#50691](https://github.com/nodejs/node/pull/50691) +* \[[`a541b78bdb`](https://github.com/nodejs/node/commit/a541b78bdb)] - **doc**: add MrJithil to collaborators (Jithil P Ponnan) [#50666](https://github.com/nodejs/node/pull/50666) +* \[[`90f415dd61`](https://github.com/nodejs/node/commit/90f415dd61)] - **doc**: fix typo in fs.md (fwio) [#50570](https://github.com/nodejs/node/pull/50570) +* \[[`e2388151ba`](https://github.com/nodejs/node/commit/e2388151ba)] - **doc**: add missing description of argument in `subtle.encrypt` (Deokjin Kim) [#50578](https://github.com/nodejs/node/pull/50578) +* \[[`39cc013465`](https://github.com/nodejs/node/commit/39cc013465)] - **doc**: update pm documentation to include resource (Ranieri Innocenti Spada) [#50601](https://github.com/nodejs/node/pull/50601) +* \[[`ba6d427c23`](https://github.com/nodejs/node/commit/ba6d427c23)] - **doc**: correct attribution in v20.6.0 changelog (Jacob Smith) [#50564](https://github.com/nodejs/node/pull/50564) +* \[[`1b2dab8254`](https://github.com/nodejs/node/commit/1b2dab8254)] - **doc**: update to align `console.table` row to the left (Jungku Lee) [#50553](https://github.com/nodejs/node/pull/50553) +* \[[`5d48ef7778`](https://github.com/nodejs/node/commit/5d48ef7778)] - **doc**: underline links (Rich Trott) [#50481](https://github.com/nodejs/node/pull/50481) +* \[[`5e6057c9d2`](https://github.com/nodejs/node/commit/5e6057c9d2)] - **doc**: remove duplicate word (Gerhard Stöbich) [#50475](https://github.com/nodejs/node/pull/50475) +* \[[`64bf2fd4ee`](https://github.com/nodejs/node/commit/64bf2fd4ee)] - **doc**: fix typo in `webstreams.md` (André Santos) [#50426](https://github.com/nodejs/node/pull/50426) +* \[[`cca55b8414`](https://github.com/nodejs/node/commit/cca55b8414)] - **doc**: add information about Node-API versions >=9 (Michael Dawson) [#50168](https://github.com/nodejs/node/pull/50168) +* \[[`d4be8fad83`](https://github.com/nodejs/node/commit/d4be8fad83)] - **doc**: add Ethan-Arrowood as a collaborator (Ethan Arrowood) [#50393](https://github.com/nodejs/node/pull/50393) +* \[[`0b311838f6`](https://github.com/nodejs/node/commit/0b311838f6)] - **doc**: fix TOC in `releases.md` (Bryce Seefieldt) [#50372](https://github.com/nodejs/node/pull/50372) +* \[[`843d5f84ca`](https://github.com/nodejs/node/commit/843d5f84ca)] - **esm**: fallback to `getSource` when `load` returns nullish `source` (Antoine du Hamel) [#50825](https://github.com/nodejs/node/pull/50825) +* \[[`8d5469c84b`](https://github.com/nodejs/node/commit/8d5469c84b)] - **esm**: do not call `getSource` when format is `commonjs` (Francesco Trotta) [#50465](https://github.com/nodejs/node/pull/50465) +* \[[`b48cf314d3`](https://github.com/nodejs/node/commit/b48cf314d3)] - **esm**: bypass CJS loader in default load under `--default-type=module` (Antoine du Hamel) [#50004](https://github.com/nodejs/node/pull/50004) +* \[[`c1a196c897`](https://github.com/nodejs/node/commit/c1a196c897)] - **(SEMVER-MINOR)** **esm**: add import.meta.dirname and import.meta.filename (James Sumners) [#48740](https://github.com/nodejs/node/pull/48740) +* \[[`435f9c9276`](https://github.com/nodejs/node/commit/435f9c9276)] - **fs**: use default w flag for writeFileSync with utf8 encoding (Murilo Kakazu) [#50990](https://github.com/nodejs/node/pull/50990) +* \[[`aa3209b880`](https://github.com/nodejs/node/commit/aa3209b880)] - **fs**: add c++ fast path for writeFileSync utf8 (CanadaHonk) [#49884](https://github.com/nodejs/node/pull/49884) +* \[[`05e25e0230`](https://github.com/nodejs/node/commit/05e25e0230)] - **fs**: improve error perf of sync `lstat`+`fstat` (CanadaHonk) [#49868](https://github.com/nodejs/node/pull/49868) +* \[[`f94a24cb4b`](https://github.com/nodejs/node/commit/f94a24cb4b)] - **fs**: improve error performance for `rmdirSync` (CanadaHonk) [#49846](https://github.com/nodejs/node/pull/49846) +* \[[`cada22e2a4`](https://github.com/nodejs/node/commit/cada22e2a4)] - **fs**: fix to not return for void function (Jungku Lee) [#50769](https://github.com/nodejs/node/pull/50769) +* \[[`ba40b2e33e`](https://github.com/nodejs/node/commit/ba40b2e33e)] - **fs**: replace deprecated `path._makeLong` in copyFile (CanadaHonk) [#50844](https://github.com/nodejs/node/pull/50844) +* \[[`d1b6bd660a`](https://github.com/nodejs/node/commit/d1b6bd660a)] - **fs**: update param in jsdoc for `readdir` (Jungku Lee) [#50448](https://github.com/nodejs/node/pull/50448) +* \[[`11412e863a`](https://github.com/nodejs/node/commit/11412e863a)] - **fs**: do not throw error on cpSync internals (Yagiz Nizipli) [#50185](https://github.com/nodejs/node/pull/50185) +* \[[`868a464c15`](https://github.com/nodejs/node/commit/868a464c15)] - **fs,url**: move `FromNamespacedPath` to `node_url` (Yagiz Nizipli) [#50090](https://github.com/nodejs/node/pull/50090) +* \[[`de7fe08c7b`](https://github.com/nodejs/node/commit/de7fe08c7b)] - **fs,url**: refactor `FileURLToPath` method (Yagiz Nizipli) [#50090](https://github.com/nodejs/node/pull/50090) +* \[[`186e6e0395`](https://github.com/nodejs/node/commit/186e6e0395)] - **fs,url**: move `FileURLToPath` to node\_url (Yagiz Nizipli) [#50090](https://github.com/nodejs/node/pull/50090) +* \[[`aea7fe54af`](https://github.com/nodejs/node/commit/aea7fe54af)] - **inspector**: use private fields instead of symbols (Yagiz Nizipli) [#50776](https://github.com/nodejs/node/pull/50776) +* \[[`48dbde71d8`](https://github.com/nodejs/node/commit/48dbde71d8)] - **lib**: use primordials for navigator.userAgent (Aras Abbasi) [#50467](https://github.com/nodejs/node/pull/50467) +* \[[`fa220cac87`](https://github.com/nodejs/node/commit/fa220cac87)] - **lib**: remove deprecated string methods (Jithil P Ponnan) [#50592](https://github.com/nodejs/node/pull/50592) +* \[[`f1cf1c385f`](https://github.com/nodejs/node/commit/f1cf1c385f)] - **lib**: fix assert shows diff messages in ESM and CJS (Jithil P Ponnan) [#50634](https://github.com/nodejs/node/pull/50634) +* \[[`3844af288f`](https://github.com/nodejs/node/commit/3844af288f)] - **lib**: make event static properties non writable and configurable (Muthukumar) [#50425](https://github.com/nodejs/node/pull/50425) +* \[[`0a0b416d6c`](https://github.com/nodejs/node/commit/0a0b416d6c)] - **lib**: avoid memory allocation on nodeprecation flag (Vinicius Lourenço) [#50231](https://github.com/nodejs/node/pull/50231) +* \[[`e7551d5770`](https://github.com/nodejs/node/commit/e7551d5770)] - **lib**: align console.table row to the left (Jithil P Ponnan) [#50135](https://github.com/nodejs/node/pull/50135) +* \[[`0c85cebdf2`](https://github.com/nodejs/node/commit/0c85cebdf2)] - **meta**: clarify nomination process according to Node.js charter (Matteo Collina) [#50834](https://github.com/nodejs/node/pull/50834) +* \[[`f4070dd8d4`](https://github.com/nodejs/node/commit/f4070dd8d4)] - **meta**: clarify recommendation for bug reproductions (Antoine du Hamel) [#50882](https://github.com/nodejs/node/pull/50882) +* \[[`2ddeead436`](https://github.com/nodejs/node/commit/2ddeead436)] - **meta**: move cjihrig to TSC regular member (Colin Ihrig) [#50816](https://github.com/nodejs/node/pull/50816) +* \[[`34a789d9be`](https://github.com/nodejs/node/commit/34a789d9be)] - **meta**: add web-standards as WPTs owner (Filip Skokan) [#50636](https://github.com/nodejs/node/pull/50636) +* \[[`40bbffa266`](https://github.com/nodejs/node/commit/40bbffa266)] - **meta**: bump github/codeql-action from 2.21.9 to 2.22.5 (dependabot\[bot]) [#50513](https://github.com/nodejs/node/pull/50513) +* \[[`c49553631d`](https://github.com/nodejs/node/commit/c49553631d)] - **meta**: bump step-security/harden-runner from 2.5.1 to 2.6.0 (dependabot\[bot]) [#50512](https://github.com/nodejs/node/pull/50512) +* \[[`99df0138b0`](https://github.com/nodejs/node/commit/99df0138b0)] - **meta**: bump ossf/scorecard-action from 2.2.0 to 2.3.1 (dependabot\[bot]) [#50509](https://github.com/nodejs/node/pull/50509) +* \[[`9db6227ac6`](https://github.com/nodejs/node/commit/9db6227ac6)] - **meta**: fix spacing in collaborator list (Antoine du Hamel) [#50641](https://github.com/nodejs/node/pull/50641) +* \[[`2589a5a566`](https://github.com/nodejs/node/commit/2589a5a566)] - **meta**: bump actions/setup-python from 4.7.0 to 4.7.1 (dependabot\[bot]) [#50510](https://github.com/nodejs/node/pull/50510) +* \[[`5a86661a95`](https://github.com/nodejs/node/commit/5a86661a95)] - **meta**: add crypto as crypto and webcrypto docs owner (Filip Skokan) [#50579](https://github.com/nodejs/node/pull/50579) +* \[[`ac8d2b9cc2`](https://github.com/nodejs/node/commit/ac8d2b9cc2)] - **meta**: bump actions/setup-node from 3.8.1 to 4.0.0 (dependabot\[bot]) [#50514](https://github.com/nodejs/node/pull/50514) +* \[[`bee2c0cf11`](https://github.com/nodejs/node/commit/bee2c0cf11)] - **meta**: bump actions/checkout from 4.1.0 to 4.1.1 (dependabot\[bot]) [#50511](https://github.com/nodejs/node/pull/50511) +* \[[`91a0944e5f`](https://github.com/nodejs/node/commit/91a0944e5f)] - **meta**: add to mailmap (Ethan Arrowood) [#50491](https://github.com/nodejs/node/pull/50491) +* \[[`8d3cf8c4ee`](https://github.com/nodejs/node/commit/8d3cf8c4ee)] - **meta**: add web-standards as web api visibility owner (Chengzhong Wu) [#50418](https://github.com/nodejs/node/pull/50418) +* \[[`807c12de36`](https://github.com/nodejs/node/commit/807c12de36)] - **meta**: mention other notable changes section (Rafael Gonzaga) [#50309](https://github.com/nodejs/node/pull/50309) +* \[[`21ab3c0f0b`](https://github.com/nodejs/node/commit/21ab3c0f0b)] - **(SEMVER-MINOR)** **module**: bootstrap module loaders in shadow realm (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`8e886a2fff`](https://github.com/nodejs/node/commit/8e886a2fff)] - **(SEMVER-MINOR)** **module**: remove useCustomLoadersIfPresent flag (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`77e8361213`](https://github.com/nodejs/node/commit/77e8361213)] - **module**: execute `--import` sequentially (Antoine du Hamel) [#50474](https://github.com/nodejs/node/pull/50474) +* \[[`fffc4951ac`](https://github.com/nodejs/node/commit/fffc4951ac)] - **module**: add application/json in accept header when fetching json module (Marco Ippolito) [#50119](https://github.com/nodejs/node/pull/50119) +* \[[`f808e7a650`](https://github.com/nodejs/node/commit/f808e7a650)] - **net**: check pipe mode and path (theanarkh) [#50770](https://github.com/nodejs/node/pull/50770) +* \[[`cf3a4c5b84`](https://github.com/nodejs/node/commit/cf3a4c5b84)] - **node-api**: factor out common code into macros (Gabriel Schulhof) [#50664](https://github.com/nodejs/node/pull/50664) +* \[[`a7d8f6b529`](https://github.com/nodejs/node/commit/a7d8f6b529)] - **perf\_hooks**: implement performance.now() with fast API calls (Joyee Cheung) [#50492](https://github.com/nodejs/node/pull/50492) +* \[[`076dc7540b`](https://github.com/nodejs/node/commit/076dc7540b)] - **permission**: do not create symlinks if target is relative (Tobias Nießen) [#49156](https://github.com/nodejs/node/pull/49156) +* \[[`43160dcd2d`](https://github.com/nodejs/node/commit/43160dcd2d)] - **permission**: mark const functions as such (Tobias Nießen) [#50705](https://github.com/nodejs/node/pull/50705) +* \[[`7a661d7ad9`](https://github.com/nodejs/node/commit/7a661d7ad9)] - **permission**: address coverity warning (Michael Dawson) [#50215](https://github.com/nodejs/node/pull/50215) +* \[[`b2b4132c3e`](https://github.com/nodejs/node/commit/b2b4132c3e)] - **src**: iterate on import attributes array correctly (Michaël Zasso) [#50703](https://github.com/nodejs/node/pull/50703) +* \[[`11b3e470db`](https://github.com/nodejs/node/commit/11b3e470db)] - **(SEMVER-MINOR)** **src**: create per isolate proxy env template (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`d00412a083`](https://github.com/nodejs/node/commit/d00412a083)] - **(SEMVER-MINOR)** **src**: create fs\_dir per isolate properties (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`14cc3b9b90`](https://github.com/nodejs/node/commit/14cc3b9b90)] - **(SEMVER-MINOR)** **src**: create worker per isolate properties (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`621c4d66c2`](https://github.com/nodejs/node/commit/621c4d66c2)] - **(SEMVER-MINOR)** **src**: make process binding data weak (Chengzhong Wu) [#48655](https://github.com/nodejs/node/pull/48655) +* \[[`07a4e94e84`](https://github.com/nodejs/node/commit/07a4e94e84)] - **src**: assert return value of BN\_bn2binpad (Tobias Nießen) [#50860](https://github.com/nodejs/node/pull/50860) +* \[[`158db2d61e`](https://github.com/nodejs/node/commit/158db2d61e)] - **src**: fix coverity warning (Michael Dawson) [#50846](https://github.com/nodejs/node/pull/50846) +* \[[`94363bb3fd`](https://github.com/nodejs/node/commit/94363bb3fd)] - **src**: fix compatility with upcoming V8 12.1 APIs (Cheng Zhao) [#50709](https://github.com/nodejs/node/pull/50709) +* \[[`29d91b13e3`](https://github.com/nodejs/node/commit/29d91b13e3)] - **(SEMVER-MINOR)** **src**: add `--disable-warning` option (Ethan Arrowood) [#50661](https://github.com/nodejs/node/pull/50661) +* \[[`f054c337f8`](https://github.com/nodejs/node/commit/f054c337f8)] - **src**: add IsolateScopes before using isolates (Keyhan Vakil) [#50680](https://github.com/nodejs/node/pull/50680) +* \[[`d08eb382cd`](https://github.com/nodejs/node/commit/d08eb382cd)] - **src**: avoid copying strings in FSPermission::Apply (Tobias Nießen) [#50662](https://github.com/nodejs/node/pull/50662) +* \[[`6620df1c05`](https://github.com/nodejs/node/commit/6620df1c05)] - **src**: remove erroneous default argument in RadixTree (Tobias Nießen) [#50736](https://github.com/nodejs/node/pull/50736) +* \[[`436c3aef15`](https://github.com/nodejs/node/commit/436c3aef15)] - **src**: fix JSONParser leaking internal V8 scopes (Keyhan Vakil) [#50688](https://github.com/nodejs/node/pull/50688) +* \[[`6f46d31018`](https://github.com/nodejs/node/commit/6f46d31018)] - **src**: return error --env-file if file is not found (Ardi Nugraha) [#50588](https://github.com/nodejs/node/pull/50588) +* \[[`3d43fd359c`](https://github.com/nodejs/node/commit/3d43fd359c)] - **src**: avoid silent coercion to signed/unsigned int (Tobias Nießen) [#50663](https://github.com/nodejs/node/pull/50663) +* \[[`c253e39b56`](https://github.com/nodejs/node/commit/c253e39b56)] - **src**: handle errors from uv\_pipe\_connect2() (Deokjin Kim) [#50657](https://github.com/nodejs/node/pull/50657) +* \[[`3a9713bb5a`](https://github.com/nodejs/node/commit/3a9713bb5a)] - **src**: use v8::Isolate::TryGetCurrent() in DumpJavaScriptBacktrace() (Joyee Cheung) [#50518](https://github.com/nodejs/node/pull/50518) +* \[[`94f8a925a8`](https://github.com/nodejs/node/commit/94f8a925a8)] - **src**: print more information in C++ assertions (Joyee Cheung) [#50242](https://github.com/nodejs/node/pull/50242) +* \[[`23f830616b`](https://github.com/nodejs/node/commit/23f830616b)] - **src**: hide node::credentials::HasOnly outside unit (Tobias Nießen) [#50450](https://github.com/nodejs/node/pull/50450) +* \[[`b7ecb0a390`](https://github.com/nodejs/node/commit/b7ecb0a390)] - **src**: readiterable entries may be empty (Matthew Aitken) [#50398](https://github.com/nodejs/node/pull/50398) +* \[[`4ef1d68715`](https://github.com/nodejs/node/commit/4ef1d68715)] - **src**: implement structuredClone in native (Joyee Cheung) [#50330](https://github.com/nodejs/node/pull/50330) +* \[[`9346f15138`](https://github.com/nodejs/node/commit/9346f15138)] - **src**: use find instead of char-by-char in FromFilePath() (Daniel Lemire) [#50288](https://github.com/nodejs/node/pull/50288) +* \[[`8414fb4d2a`](https://github.com/nodejs/node/commit/8414fb4d2a)] - **src**: add commit hash shorthand in zlib version (Jithil P Ponnan) [#50158](https://github.com/nodejs/node/pull/50158) +* \[[`a878e3abb0`](https://github.com/nodejs/node/commit/a878e3abb0)] - **stream**: fix enumerability of ReadableStream.from (Mattias Buelens) [#50779](https://github.com/nodejs/node/pull/50779) +* \[[`95ed4ffc1e`](https://github.com/nodejs/node/commit/95ed4ffc1e)] - **stream**: fix enumerability of ReadableStream.prototype.values (Mattias Buelens) [#50779](https://github.com/nodejs/node/pull/50779) +* \[[`4cf155ca0c`](https://github.com/nodejs/node/commit/4cf155ca0c)] - **stream**: add Symbol.toStringTag to Compression Streams (Filip Skokan) [#50712](https://github.com/nodejs/node/pull/50712) +* \[[`6012e3e781`](https://github.com/nodejs/node/commit/6012e3e781)] - **stream**: fix Writable.destroy performance regression (Robert Nagy) [#50478](https://github.com/nodejs/node/pull/50478) +* \[[`dd5206820c`](https://github.com/nodejs/node/commit/dd5206820c)] - **stream**: pre-allocate \_events (Robert Nagy) [#50428](https://github.com/nodejs/node/pull/50428) +* \[[`829b82ed0f`](https://github.com/nodejs/node/commit/829b82ed0f)] - **stream**: remove no longer relevant comment (Robert Nagy) [#50446](https://github.com/nodejs/node/pull/50446) +* \[[`98ae1b4132`](https://github.com/nodejs/node/commit/98ae1b4132)] - **stream**: use bit fields for construct/destroy (Robert Nagy) [#50408](https://github.com/nodejs/node/pull/50408) +* \[[`08a0c6c56c`](https://github.com/nodejs/node/commit/08a0c6c56c)] - **stream**: improve from perf (Raz Luvaton) [#50359](https://github.com/nodejs/node/pull/50359) +* \[[`59f7316b8f`](https://github.com/nodejs/node/commit/59f7316b8f)] - **stream**: avoid calls to listenerCount (Robert Nagy) [#50357](https://github.com/nodejs/node/pull/50357) +* \[[`9d52430eb9`](https://github.com/nodejs/node/commit/9d52430eb9)] - **stream**: readable use bitmap accessors (Robert Nagy) [#50350](https://github.com/nodejs/node/pull/50350) +* \[[`139d6c8d3b`](https://github.com/nodejs/node/commit/139d6c8d3b)] - **stream**: use Array for Readable buffer (Robert Nagy) [#50341](https://github.com/nodejs/node/pull/50341) +* \[[`6206957e8d`](https://github.com/nodejs/node/commit/6206957e8d)] - **stream**: optimize creation (Robert Nagy) [#50337](https://github.com/nodejs/node/pull/50337) +* \[[`f87921de3b`](https://github.com/nodejs/node/commit/f87921de3b)] - **stream**: refactor writable \_write (Robert Nagy) [#50198](https://github.com/nodejs/node/pull/50198) +* \[[`b338f3d3c2`](https://github.com/nodejs/node/commit/b338f3d3c2)] - **stream**: avoid getter for defaultEncoding (Robert Nagy) [#50203](https://github.com/nodejs/node/pull/50203) +* \[[`1862235a26`](https://github.com/nodejs/node/commit/1862235a26)] - **test**: fix message v8 not normalising alphanumeric paths (Jithil P Ponnan) [#50730](https://github.com/nodejs/node/pull/50730) +* \[[`7c28a4ca8f`](https://github.com/nodejs/node/commit/7c28a4ca8f)] - **test**: fix dns test case failures after c-ares update to 1.21.0+ (Brad House) [#50743](https://github.com/nodejs/node/pull/50743) +* \[[`4544593d31`](https://github.com/nodejs/node/commit/4544593d31)] - **test**: replace forEach with for of (Conor Watson) [#50594](https://github.com/nodejs/node/pull/50594) +* \[[`96143a3293`](https://github.com/nodejs/node/commit/96143a3293)] - **test**: replace forEach to for at test-webcrypto-sign-verify-ecdsa.js (Alessandro Di Nisio) [#50795](https://github.com/nodejs/node/pull/50795) +* \[[`107b5e63c5`](https://github.com/nodejs/node/commit/107b5e63c5)] - **test**: replace foreach with for in test-https-simple.js (Shikha Mehta) [#49793](https://github.com/nodejs/node/pull/49793) +* \[[`9b2e5e9db4`](https://github.com/nodejs/node/commit/9b2e5e9db4)] - **test**: add note about unresolved spec issue (Mattias Buelens) [#50779](https://github.com/nodejs/node/pull/50779) +* \[[`edce637c1a`](https://github.com/nodejs/node/commit/edce637c1a)] - **test**: add note about readable streams with type owning (Mattias Buelens) [#50779](https://github.com/nodejs/node/pull/50779) +* \[[`641044670b`](https://github.com/nodejs/node/commit/641044670b)] - **test**: replace forEach with for-of in test-url-relative (vitosorriso) [#50788](https://github.com/nodejs/node/pull/50788) +* \[[`75ee78438c`](https://github.com/nodejs/node/commit/75ee78438c)] - **test**: replace forEach() with for ... of in test-tls-getprotocol.js (Steve Goode) [#50600](https://github.com/nodejs/node/pull/50600) +* \[[`24f9d3fbeb`](https://github.com/nodejs/node/commit/24f9d3fbeb)] - **test**: enable idlharness tests for encoding (Mattias Buelens) [#50778](https://github.com/nodejs/node/pull/50778) +* \[[`a9d290956e`](https://github.com/nodejs/node/commit/a9d290956e)] - **test**: replace forEach in whatwg-encoding-custom-interop (Honza Machala) [#50607](https://github.com/nodejs/node/pull/50607) +* \[[`6584dd80f7`](https://github.com/nodejs/node/commit/6584dd80f7)] - **test**: replace forEach() with for-loop (Jan) [#50596](https://github.com/nodejs/node/pull/50596) +* \[[`be54a22869`](https://github.com/nodejs/node/commit/be54a22869)] - **test**: improve test-bootstrap-modules.js (Joyee Cheung) [#50708](https://github.com/nodejs/node/pull/50708) +* \[[`660e70e73b`](https://github.com/nodejs/node/commit/660e70e73b)] - **test**: skip parallel/test-macos-app-sandbox if disk space < 120MB (Joyee Cheung) [#50764](https://github.com/nodejs/node/pull/50764) +* \[[`5712c41122`](https://github.com/nodejs/node/commit/5712c41122)] - **test**: replace foreach with for (Markus Muschol) [#50599](https://github.com/nodejs/node/pull/50599) +* \[[`49e5f47b1c`](https://github.com/nodejs/node/commit/49e5f47b1c)] - **test**: test streambase has already has a consumer (Jithil P Ponnan) [#48059](https://github.com/nodejs/node/pull/48059) +* \[[`bb7d764c8e`](https://github.com/nodejs/node/commit/bb7d764c8e)] - **test**: change forEach to for...of in path extname (Kyriakos Markakis) [#50667](https://github.com/nodejs/node/pull/50667) +* \[[`4d28ced079`](https://github.com/nodejs/node/commit/4d28ced079)] - **test**: replace forEach with for...of (Ryan Williams) [#50611](https://github.com/nodejs/node/pull/50611) +* \[[`92a153ecde`](https://github.com/nodejs/node/commit/92a153ecde)] - **test**: migrate message v8 tests from Python to JS (Joshua LeMay) [#50421](https://github.com/nodejs/node/pull/50421) +* \[[`a376284d8a`](https://github.com/nodejs/node/commit/a376284d8a)] - **test**: use destructuring for accessing setting values (Honza Jedlička) [#50609](https://github.com/nodejs/node/pull/50609) +* \[[`7b9b1fba27`](https://github.com/nodejs/node/commit/7b9b1fba27)] - **test**: replace forEach() with for .. of (Evgenia Blajer) [#50605](https://github.com/nodejs/node/pull/50605) +* \[[`9397b2da7e`](https://github.com/nodejs/node/commit/9397b2da7e)] - **test**: replace forEach() with for ... of in test-readline-keys.js (William Liang) [#50604](https://github.com/nodejs/node/pull/50604) +* \[[`9043ba4cfb`](https://github.com/nodejs/node/commit/9043ba4cfb)] - **test**: replace forEach() with for ... of in test-http2-single-headers.js (spiritualized) [#50606](https://github.com/nodejs/node/pull/50606) +* \[[`9f911d31f6`](https://github.com/nodejs/node/commit/9f911d31f6)] - **test**: replace forEach with for of (john-mcinall) [#50602](https://github.com/nodejs/node/pull/50602) +* \[[`8a5f36fe74`](https://github.com/nodejs/node/commit/8a5f36fe74)] - **test**: remove unused file (James Sumners) [#50528](https://github.com/nodejs/node/pull/50528) +* \[[`9950203340`](https://github.com/nodejs/node/commit/9950203340)] - **test**: replace forEach with for of (Kevin Kühnemund) [#50597](https://github.com/nodejs/node/pull/50597) +* \[[`03ba28f102`](https://github.com/nodejs/node/commit/03ba28f102)] - **test**: replace forEach with for of (CorrWu) [#49785](https://github.com/nodejs/node/pull/49785) +* \[[`ea61261b54`](https://github.com/nodejs/node/commit/ea61261b54)] - **test**: replace forEach with for \[...] of (Gabriel Bota) [#50615](https://github.com/nodejs/node/pull/50615) +* \[[`4349790913`](https://github.com/nodejs/node/commit/4349790913)] - **test**: add WPT report test duration (Filip Skokan) [#50574](https://github.com/nodejs/node/pull/50574) +* \[[`7cacddfcc1`](https://github.com/nodejs/node/commit/7cacddfcc1)] - **test**: replace forEach() with for ... of loop in test-global.js (Kajol) [#49772](https://github.com/nodejs/node/pull/49772) +* \[[`889f58d07f`](https://github.com/nodejs/node/commit/889f58d07f)] - **test**: skip test-diagnostics-channel-memory-leak.js (Joyee Cheung) [#50327](https://github.com/nodejs/node/pull/50327) +* \[[`41644ee071`](https://github.com/nodejs/node/commit/41644ee071)] - **test**: improve `UV_THREADPOOL_SIZE` tests on `.env` (Yagiz Nizipli) [#49213](https://github.com/nodejs/node/pull/49213) +* \[[`1db44b9a53`](https://github.com/nodejs/node/commit/1db44b9a53)] - **test**: recognize wpt completion error (Chengzhong Wu) [#50429](https://github.com/nodejs/node/pull/50429) +* \[[`ecfc951ddc`](https://github.com/nodejs/node/commit/ecfc951ddc)] - **test**: report error wpt test results (Chengzhong Wu) [#50429](https://github.com/nodejs/node/pull/50429) +* \[[`deb0351d95`](https://github.com/nodejs/node/commit/deb0351d95)] - **test**: replace forEach() with for...of (Ram) [#49794](https://github.com/nodejs/node/pull/49794) +* \[[`f885dfe5e3`](https://github.com/nodejs/node/commit/f885dfe5e3)] - **test**: replace forEach() with for...of in test-trace-events-http (Chand) [#49795](https://github.com/nodejs/node/pull/49795) +* \[[`9dc63c56db`](https://github.com/nodejs/node/commit/9dc63c56db)] - **test**: replace forEach with for...of in test-fs-realpath-buffer-encoding (Niya Shiyas) [#49804](https://github.com/nodejs/node/pull/49804) +* \[[`600d1260da`](https://github.com/nodejs/node/commit/600d1260da)] - **test**: fix timeout of test-cpu-prof-dir-worker.js in LoongArch devices (Shi Pujin) [#50363](https://github.com/nodejs/node/pull/50363) +* \[[`099f5cfa0a`](https://github.com/nodejs/node/commit/099f5cfa0a)] - **test**: fix vm assertion actual and expected order (Chengzhong Wu) [#50371](https://github.com/nodejs/node/pull/50371) +* \[[`a31f9bfe01`](https://github.com/nodejs/node/commit/a31f9bfe01)] - **test**: v8: Add test-linux-perf-logger test suite (Luke Albao) [#50352](https://github.com/nodejs/node/pull/50352) +* \[[`6c59114947`](https://github.com/nodejs/node/commit/6c59114947)] - **test**: ensure never settling promises are detected (Antoine du Hamel) [#50318](https://github.com/nodejs/node/pull/50318) +* \[[`9830ae4bf7`](https://github.com/nodejs/node/commit/9830ae4bf7)] - **test\_runner**: add tests for various mock timer issues (Mika Fischer) [#50384](https://github.com/nodejs/node/pull/50384) +* \[[`2c72ed85fb`](https://github.com/nodejs/node/commit/2c72ed85fb)] - **test\_runner**: pass abortSignal to test files (Moshe Atlow) [#50630](https://github.com/nodejs/node/pull/50630) +* \[[`c33a84af11`](https://github.com/nodejs/node/commit/c33a84af11)] - **test\_runner**: replace forEach with for of (Tom Haddad) [#50595](https://github.com/nodejs/node/pull/50595) +* \[[`29c68a22bb`](https://github.com/nodejs/node/commit/29c68a22bb)] - **test\_runner**: output errors of suites (Moshe Atlow) [#50361](https://github.com/nodejs/node/pull/50361) +* \[[`e64378643d`](https://github.com/nodejs/node/commit/e64378643d)] - **(SEMVER-MINOR)** **test\_runner**: adds built in lcov reporter (Phil Nash) [#50018](https://github.com/nodejs/node/pull/50018) +* \[[`4aaaff413b`](https://github.com/nodejs/node/commit/4aaaff413b)] - **test\_runner**: test return value of mocked promisified timers (Mika Fischer) [#50331](https://github.com/nodejs/node/pull/50331) +* \[[`4a830c2d9d`](https://github.com/nodejs/node/commit/4a830c2d9d)] - **(SEMVER-MINOR)** **test\_runner**: add Date to the supported mock APIs (Lucas Santos) [#48638](https://github.com/nodejs/node/pull/48638) +* \[[`842dc01def`](https://github.com/nodejs/node/commit/842dc01def)] - **(SEMVER-MINOR)** **test\_runner, cli**: add --test-timeout flag (Shubham Pandey) [#50443](https://github.com/nodejs/node/pull/50443) +* \[[`613a9072b7`](https://github.com/nodejs/node/commit/613a9072b7)] - **tls**: fix order of setting cipher before setting cert and key (Kumar Rishav) [#50186](https://github.com/nodejs/node/pull/50186) +* \[[`d905c61e16`](https://github.com/nodejs/node/commit/d905c61e16)] - **tls**: use `validateFunction` for `options.SNICallback` (Deokjin Kim) [#50530](https://github.com/nodejs/node/pull/50530) +* \[[`c8d6dd58e7`](https://github.com/nodejs/node/commit/c8d6dd58e7)] - **tools**: add macOS notarization verification step (Ulises Gascón) [#50833](https://github.com/nodejs/node/pull/50833) +* \[[`c9bd0b0c0f`](https://github.com/nodejs/node/commit/c9bd0b0c0f)] - **tools**: use macOS keychain to notarize the releases (Ulises Gascón) [#50715](https://github.com/nodejs/node/pull/50715) +* \[[`932a5d7b2c`](https://github.com/nodejs/node/commit/932a5d7b2c)] - **tools**: update eslint to 8.54.0 (Node.js GitHub Bot) [#50809](https://github.com/nodejs/node/pull/50809) +* \[[`d7114d97be`](https://github.com/nodejs/node/commit/d7114d97be)] - **tools**: update lint-md-dependencies to rollup\@4.5.0 (Node.js GitHub Bot) [#50807](https://github.com/nodejs/node/pull/50807) +* \[[`93085cf844`](https://github.com/nodejs/node/commit/93085cf844)] - **tools**: add workflow to update release links (Michaël Zasso) [#50710](https://github.com/nodejs/node/pull/50710) +* \[[`66764c5d04`](https://github.com/nodejs/node/commit/66764c5d04)] - **tools**: recognize GN files in dep\_updaters (Cheng Zhao) [#50693](https://github.com/nodejs/node/pull/50693) +* \[[`2a451e176a`](https://github.com/nodejs/node/commit/2a451e176a)] - **tools**: remove unused file (Ulises Gascon) [#50622](https://github.com/nodejs/node/pull/50622) +* \[[`8ce6403230`](https://github.com/nodejs/node/commit/8ce6403230)] - **tools**: change minimatch install strategy (Marco Ippolito) [#50476](https://github.com/nodejs/node/pull/50476) +* \[[`97778e2e77`](https://github.com/nodejs/node/commit/97778e2e77)] - **tools**: update lint-md-dependencies to rollup\@4.3.1 (Node.js GitHub Bot) [#50675](https://github.com/nodejs/node/pull/50675) +* \[[`797f6a9ba8`](https://github.com/nodejs/node/commit/797f6a9ba8)] - **tools**: add macOS notarization stapler (Ulises Gascón) [#50625](https://github.com/nodejs/node/pull/50625) +* \[[`8fa1319352`](https://github.com/nodejs/node/commit/8fa1319352)] - **tools**: update eslint to 8.53.0 (Node.js GitHub Bot) [#50559](https://github.com/nodejs/node/pull/50559) +* \[[`592f57970f`](https://github.com/nodejs/node/commit/592f57970f)] - **tools**: update lint-md-dependencies to rollup\@4.3.0 (Node.js GitHub Bot) [#50556](https://github.com/nodejs/node/pull/50556) +* \[[`2fd78fc39e`](https://github.com/nodejs/node/commit/2fd78fc39e)] - **tools**: compare ICU checksums before file changes (Michaël Zasso) [#50522](https://github.com/nodejs/node/pull/50522) +* \[[`631d710fc4`](https://github.com/nodejs/node/commit/631d710fc4)] - **tools**: improve update acorn-walk script (Marco Ippolito) [#50473](https://github.com/nodejs/node/pull/50473) +* \[[`33fd2af2ab`](https://github.com/nodejs/node/commit/33fd2af2ab)] - **tools**: update lint-md-dependencies to rollup\@4.2.0 (Node.js GitHub Bot) [#50496](https://github.com/nodejs/node/pull/50496) +* \[[`22b7a74838`](https://github.com/nodejs/node/commit/22b7a74838)] - **tools**: update gyp-next to v0.16.1 (Michaël Zasso) [#50380](https://github.com/nodejs/node/pull/50380) +* \[[`f5ccab5005`](https://github.com/nodejs/node/commit/f5ccab5005)] - **tools**: skip ruff on tools/gyp (Michaël Zasso) [#50380](https://github.com/nodejs/node/pull/50380) +* \[[`408fd90508`](https://github.com/nodejs/node/commit/408fd90508)] - **tools**: update lint-md-dependencies to rollup\@4.1.5 unified\@11.0.4 (Node.js GitHub Bot) [#50461](https://github.com/nodejs/node/pull/50461) +* \[[`685f936ccd`](https://github.com/nodejs/node/commit/685f936ccd)] - **tools**: avoid npm install in deps installation (Marco Ippolito) [#50413](https://github.com/nodejs/node/pull/50413) +* \[[`7d43c5a094`](https://github.com/nodejs/node/commit/7d43c5a094)] - _**Revert**_ "**tools**: update doc dependencies" (Richard Lau) [#50414](https://github.com/nodejs/node/pull/50414) +* \[[`8fd67c2e3e`](https://github.com/nodejs/node/commit/8fd67c2e3e)] - **tools**: update doc dependencies (Node.js GitHub Bot) [#49988](https://github.com/nodejs/node/pull/49988) +* \[[`586becb507`](https://github.com/nodejs/node/commit/586becb507)] - **tools**: run coverage CI only on relevant files (Antoine du Hamel) [#50349](https://github.com/nodejs/node/pull/50349) +* \[[`2d06eea6c5`](https://github.com/nodejs/node/commit/2d06eea6c5)] - **tools**: update eslint to 8.52.0 (Node.js GitHub Bot) [#50326](https://github.com/nodejs/node/pull/50326) +* \[[`6a897baf16`](https://github.com/nodejs/node/commit/6a897baf16)] - **tools**: update lint-md-dependencies (Node.js GitHub Bot) [#50190](https://github.com/nodejs/node/pull/50190) +* \[[`e6e7f39b9e`](https://github.com/nodejs/node/commit/e6e7f39b9e)] - **util**: improve performance of normalizeEncoding (kylo5aby) [#50721](https://github.com/nodejs/node/pull/50721) +* \[[`3b6b1afa47`](https://github.com/nodejs/node/commit/3b6b1afa47)] - **v8,tools**: expose necessary V8 defines (Cheng Zhao) [#50820](https://github.com/nodejs/node/pull/50820) +* \[[`2664012617`](https://github.com/nodejs/node/commit/2664012617)] - **vm**: allow dynamic import with a referrer realm (Chengzhong Wu) [#50360](https://github.com/nodejs/node/pull/50360) +* \[[`c6c0a74b54`](https://github.com/nodejs/node/commit/c6c0a74b54)] - **wasi**: document security sandboxing status (Guy Bedford) [#50396](https://github.com/nodejs/node/pull/50396) +* \[[`989814093e`](https://github.com/nodejs/node/commit/989814093e)] - **win,tools**: upgrade Windows signing to smctl (Stefan Stojanovic) [#50956](https://github.com/nodejs/node/pull/50956) + ## 2023-11-22, Version 20.10.0 'Iron' (LTS), @targos diff --git a/src/node_version.h b/src/node_version.h index 02d28f2150a7fa..9cf6a1a88b9563 100644 --- a/src/node_version.h +++ b/src/node_version.h @@ -23,13 +23,13 @@ #define SRC_NODE_VERSION_H_ #define NODE_MAJOR_VERSION 20 -#define NODE_MINOR_VERSION 10 -#define NODE_PATCH_VERSION 1 +#define NODE_MINOR_VERSION 11 +#define NODE_PATCH_VERSION 0 #define NODE_VERSION_IS_LTS 1 #define NODE_VERSION_LTS_CODENAME "Iron" -#define NODE_VERSION_IS_RELEASE 0 +#define NODE_VERSION_IS_RELEASE 1 #ifndef NODE_STRINGIFY #define NODE_STRINGIFY(n) NODE_STRINGIFY_HELPER(n)