From 5bf989bf7dde0a6124fb1605f3caf899273c6d4e Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Mon, 11 Mar 2024 12:13:47 +1100 Subject: [PATCH] docs: add byte strings example using tags to retain typedarray types Ref: https://github.com/rvagg/cborg/issues/69 --- README.md | 5 ++ example-bytestrings.js | 180 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 example-bytestrings.js diff --git a/README.md b/README.md index c1d1d82..7218d99 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ * [Round-trip consistency](#round-trip-consistency) * [JSON mode](#json-mode) * [Example](#example-1) +* [Advanced types and tags](#advanced-types-and-tags) * [License and Copyright](#license-and-copyright) ## Example @@ -501,6 +502,10 @@ encoded: Uint8Array(34) [ encoded (string): {"this":{"is":"JSON!","yay":true}} ``` +## Advanced types and tags + +As demonstrated above, the ability to provide custom `typeEncoders` to `encode()`, `tags` and even a custom `tokenizer` to `decode()` allow for quite a bit of flexibility in manipulating both the encode and decode process. An advanced example that uses all of these features can be found in [example-bytestrings.js](./example-bytestrings.js) which demonstrates how one might implement [RFC 8746](https://www.rfc-editor.org/rfc/rfc8746.html) to allow typed arrays to round-trip through CBOR and retain their original types. Since cborg is designed to speak purely in terms of `Uint8Array`s, its default behaviour will squash all typed arrays down to their byte array forms and materialise them as plain `Uint8Arrays`. Where round-trip fidelity is important and CBOR tags are an option, this form of usage is an option. + ## License and Copyright Copyright 2020 Rod Vagg diff --git a/example-bytestrings.js b/example-bytestrings.js new file mode 100644 index 0000000..1b12527 --- /dev/null +++ b/example-bytestrings.js @@ -0,0 +1,180 @@ +/* +RFC 8746 defines a set of tags to use for typed arrays. Out of the box, cborg doesn't care about +tags and just squashes all concerns around byte arrays to Uint8Array with major type 2. This is +fine for most use cases, but it is lossy, you can't round-trip and retain your original type. + +This example shows how to use cborg to round-trip a typed array with tags. + +https://www.rfc-editor.org/rfc/rfc8746.html +*/ + +import { encode, decode, Token, Tokenizer, Type } from 'cborg.js' + +const tagUint8Array = 64 +const tagUint64Array = 71 +// etc... see https://www.rfc-editor.org/rfc/rfc8746.html#name-iana-considerations + +/* ENCODERS */ + +/** + * @param {any} obj + * @returns {[Token]} + */ +function uint8ArrayEncoder (obj) { + if (!(obj instanceof Uint8Array)) { + throw new Error('expected Uint8Array') + } + return [ + new Token(Type.tag, tagUint8Array), + new Token(Type.bytes, obj) + ] +} + +/** + * @param {any} obj + * @returns {[Token]} + */ +function uint64ArrayEncoder (obj) { + if (!(obj instanceof BigUint64Array)) { + throw new Error('expected BigUint64Array') + } + return [ + new Token(Type.tag, tagUint64Array), + // BigUint64Array to a Uint8Array, but we have to pay attention to the possibility of it being + // a view of a larger ArrayBuffer. + new Token(Type.bytes, new Uint8Array(obj.buffer, obj.byteOffset, obj.byteLength)) + ] +} + +// etc... + +const typeEncoders = { + Uint8Array: uint8ArrayEncoder, + BigUint64Array: uint64ArrayEncoder +} + +/* DECODERS */ + +/** + * @param {ArrayBuffer} bytes + * @returns {any} + */ +function uint8ArrayDecoder (bytes) { + if (!(bytes instanceof ArrayBuffer)) { + throw new Error('expected ArrayBuffer') + } + return new Uint8Array(bytes) +} + +/** + * @param {ArrayBuffer} bytes + * @returns {any} + */ +function uint64ArrayDecoder (bytes) { + if (!(bytes instanceof ArrayBuffer)) { + throw new Error('expected ArrayBuffer') + } + return new BigUint64Array(bytes) +} + +// etc... + +const tags = [] +tags[tagUint8Array] = uint8ArrayDecoder +tags[tagUint64Array] = uint64ArrayDecoder + +/* TOKENIZER */ + +// We have to deal with the fact that cborg talks in Uint8Arrays but we now want it to treat major 2 +// as ArrayBuffers, so we have to transform the token stream to replace the Uint8Array with an +// ArrayBuffer. + +class ArrayBufferTransformingTokeniser extends Tokenizer { + next () { + const nextToken = super.next() + if (nextToken.type === Type.bytes) { + // Transform the (assumed) Uint8Array value to an ArrayBuffer of the same bytes, note though + // that all tags we care about are going to be , so we're also transforming those + // into ArrayBuffers, so our tag decoders need to also assume they are getting ArrayBuffers + // now. An alternative would be to watch the token stream for and not transform the next + // token if it's , but that's a bit more complicated for demo purposes. + nextToken.value = nextToken.value.buffer + } + return nextToken + } +} + +// Optional: a new decode() wrapper, mainly so we don't have to deal with the complications of\ +// instantiating a Tokenizer which needs both data and the options. +function byteStringDecoder (data, options) { + options = Object.assign({}, options, { + tags, + tokenizer: new ArrayBufferTransformingTokeniser(data, options) + }) + return decode(data, options) +} + +/* ROUND-TRIP */ + +const original = { + u8: new Uint8Array([1, 2, 3, 4, 5]), + u64: new BigUint64Array([10000000000000000n, 20000000000000000n, 30000000000000000n, 40000000000000000n, 50000000000000000n]), + ab: new Uint8Array([6, 7, 8, 9, 10]).buffer +} + +const encoded = encode(original, { typeEncoders }) + +const decoded = byteStringDecoder(encoded) + +console.log('Original:', original) +console.log('Encoded:', Buffer.from(encoded).toString('hex')) // excuse the Buffer, sorry browser peeps +console.log('Decoded:', decoded) + +/* Output: + +Original: { + u8: Uint8Array(5) [ 1, 2, 3, 4, 5 ], + u64: BigUint64Array(5) [ + 10000000000000000n, + 20000000000000000n, + 30000000000000000n, + 40000000000000000n, + 50000000000000000n + ], + ab: ArrayBuffer { [Uint8Contents]: <06 07 08 09 0a>, byteLength: 5 } +} +Encoded: a362616245060708090a627538d84045010203040563753634d84758280000c16ff2862300000082dfe40d47000000434fd7946a00000004bfc91b8e000000c52ebca2b100 +Decoded: { + ab: ArrayBuffer { [Uint8Contents]: <06 07 08 09 0a>, byteLength: 5 }, + u8: Uint8Array(5) [ 1, 2, 3, 4, 5 ], + u64: BigUint64Array(5) [ + 10000000000000000n, + 20000000000000000n, + 30000000000000000n, + 40000000000000000n, + 50000000000000000n + ] +} + +*/ + +/* Diagnostic: + +$ cborg hex2diag a362616245060708090a627538d84045010203040563753634d84758280000c16ff2862300000082dfe40d47000000434fd7946a00000004bfc91b8e000000c52ebca2b100 +a3 # map(3) + 62 # string(2) + 6162 # "ab" + 45 # bytes(5) + 060708090a # "\x06\x07\x08\x09\x0a" + 62 # string(2) + 7538 # "u8" + d8 40 # tag(64) + 45 # bytes(5) + 0102030405 # "\x01\x02\x03\x04\x05" + 63 # string(3) + 753634 # "u64" + d8 47 # tag(71) + 58 28 # bytes(40) + 0000c16ff2862300000082dfe40d47000000434fd7 # "\x00\x00Áoò\x86#\x00\x00\x00\x82ßä\x0dG\x00\x00\x00CO×" + 946a00000004bfc91b8e000000c52ebca2b100 # "\x94j\x00\x00\x00\x04¿É\x1b\x8e\x00\x00\x00Å.¼¢±\x00 +*/