From 3d235dc3836543d7e4ddaf8545090564a266a983 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Sun, 12 Sep 2021 18:22:40 +0100 Subject: [PATCH 1/3] feat: auto-shard based on node size js counterpart to https://github.com/ipfs/go-ipfs/pull/8114 Changes the `shardSplitThreshold` parameter to mean the size of the final DAGNode (including link names, sizes, etc) instead of the number of entries in the directory. Fixes: #149 BREAKING CHANGE: `shardSplitThreshold` now refers to node size, not number of entries --- packages/ipfs-unixfs-importer/README.md | 2 +- packages/ipfs-unixfs-importer/src/dir-flat.js | 40 +++++++- .../ipfs-unixfs-importer/src/dir-sharded.js | 91 ++++++++++++++++++- packages/ipfs-unixfs-importer/src/dir.js | 21 ++++- .../ipfs-unixfs-importer/src/flat-to-shard.js | 4 +- packages/ipfs-unixfs-importer/src/options.js | 3 +- .../ipfs-unixfs-importer/src/tree-builder.js | 2 +- packages/ipfs-unixfs/src/index.js | 4 +- 8 files changed, 154 insertions(+), 13 deletions(-) diff --git a/packages/ipfs-unixfs-importer/README.md b/packages/ipfs-unixfs-importer/README.md index 525cd8ab..4dfd95d3 100644 --- a/packages/ipfs-unixfs-importer/README.md +++ b/packages/ipfs-unixfs-importer/README.md @@ -116,7 +116,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb `options` is an JavaScript option that might include the following keys: - `wrapWithDirectory` (boolean, defaults to false): if true, a wrapping node will be created -- `shardSplitThreshold` (positive integer, defaults to 1000): the number of directory entries above which we decide to use a sharding directory builder (instead of the default flat one) +- `shardSplitThreshold` (positive integer, defaults to 256KiB): if the serialized node is larger than this it will be converted to a HAMT sharded directory - `chunker` (string, defaults to `"fixed"`): the chunking strategy. Supports: - `fixed` - `rabin` diff --git a/packages/ipfs-unixfs-importer/src/dir-flat.js b/packages/ipfs-unixfs-importer/src/dir-flat.js index ad1d12c0..c76495c6 100644 --- a/packages/ipfs-unixfs-importer/src/dir-flat.js +++ b/packages/ipfs-unixfs-importer/src/dir-flat.js @@ -1,6 +1,6 @@ import { encode, prepare } from '@ipld/dag-pb' import { UnixFS } from 'ipfs-unixfs' -import Dir from './dir.js' +import { Dir, CID_V0, CID_V1 } from './dir.js' import persist from './utils/persist.js' /** @@ -32,6 +32,7 @@ class DirFlat extends Dir { async put (name, value) { this.cid = undefined this.size = undefined + this.nodeSize = undefined this._children[name] = value } @@ -68,6 +69,43 @@ class DirFlat extends Dir { } } + calculateNodeSize () { + if (this.nodeSize !== undefined) { + return this.nodeSize + } + + const links = [] + + for (const name of Object.keys(this._children)) { + const child = this._children[name] + let size + + if (child instanceof Dir) { + size = child.calculateNodeSize() + } else { + size = child.size + } + + if (child.size != null && child.cid) { + links.push({ + Name: name, + Tsize: size, + Hash: this.options.cidVersion === 0 ? CID_V0 : CID_V1 + }) + } + } + + const unixfs = new UnixFS({ + type: 'directory', + mtime: this.mtime, + mode: this.mode + }) + + this.nodeSize = encode(prepare({ Data: unixfs.marshal(), Links: links })).length + + return this.nodeSize + } + /** * @param {Blockstore} block * @returns {AsyncIterable} diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.js b/packages/ipfs-unixfs-importer/src/dir-sharded.js index b3bc8671..07839e29 100644 --- a/packages/ipfs-unixfs-importer/src/dir-sharded.js +++ b/packages/ipfs-unixfs-importer/src/dir-sharded.js @@ -1,6 +1,6 @@ import { encode, prepare } from '@ipld/dag-pb' import { UnixFS } from 'ipfs-unixfs' -import Dir from './dir.js' +import { Dir, CID_V0, CID_V1 } from './dir.js' import persist from './utils/persist.js' import { createHAMT, Bucket } from 'hamt-sharding' @@ -35,6 +35,10 @@ class DirSharded extends Dir { * @param {InProgressImportResult | Dir} value */ async put (name, value) { + this.cid = undefined + this.size = undefined + this.nodeSize = undefined + await this._bucket.put(name, value) } @@ -66,6 +70,16 @@ class DirSharded extends Dir { } } + calculateNodeSize () { + if (this.nodeSize !== undefined) { + return this.nodeSize + } + + this.nodeSize = calculateSize(this._bucket, this, this.options) + + return this.nodeSize + } + /** * @param {Blockstore} blockstore * @returns {AsyncIterable} @@ -85,7 +99,7 @@ export default DirSharded /** * @param {Bucket} bucket * @param {Blockstore} blockstore - * @param {*} shardRoot + * @param {DirSharded | null} shardRoot * @param {ImporterOptions} options * @returns {AsyncIterable} */ @@ -183,3 +197,76 @@ async function * flush (bucket, blockstore, shardRoot, options) { size } } + +/** + * @param {Bucket} bucket + * @param {DirSharded | null} shardRoot + * @param {ImporterOptions} options + */ +function calculateSize (bucket, shardRoot, options) { + const children = bucket._children + const links = [] + + for (let i = 0; i < children.length; i++) { + const child = children.get(i) + + if (!child) { + continue + } + + const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0') + + if (child instanceof Bucket) { + const size = calculateSize(child, null, options) + + links.push({ + Name: labelPrefix, + Tsize: size, + Hash: options.cidVersion === 0 ? CID_V0 : CID_V1 + }) + } else if (typeof child.value.flush === 'function') { + const dir = child.value + const size = dir.nodeSize() + + links.push({ + Name: labelPrefix + child.key, + Tsize: size, + Hash: options.cidVersion === 0 ? CID_V0 : CID_V1 + }) + } else { + const value = child.value + + if (!value.cid) { + continue + } + + const label = labelPrefix + child.key + const size = value.size + + links.push({ + Name: label, + Tsize: size, + Hash: value.cid + }) + } + } + + // go-ipfs uses little endian, that's why we have to + // reverse the bit field before storing it + const data = Uint8Array.from(children.bitField().reverse()) + const dir = new UnixFS({ + type: 'hamt-sharded-directory', + data, + fanout: bucket.tableSize(), + hashType: options.hamtHashCode, + mtime: shardRoot && shardRoot.mtime, + mode: shardRoot && shardRoot.mode + }) + + const buffer = encode(prepare({ + Data: dir.marshal(), + Links: links + })) + + return buffer.length +} diff --git a/packages/ipfs-unixfs-importer/src/dir.js b/packages/ipfs-unixfs-importer/src/dir.js index 19fdb281..e0021ad1 100644 --- a/packages/ipfs-unixfs-importer/src/dir.js +++ b/packages/ipfs-unixfs-importer/src/dir.js @@ -1,9 +1,10 @@ +import { CID } from 'multiformats/cid' + /** * @typedef {import('./types').ImporterOptions} ImporterOptions * @typedef {import('./types').ImportResult} ImportResult * @typedef {import('./types').InProgressImportResult} InProgressImportResult * @typedef {import('interface-blockstore').Blockstore} Blockstore - * @typedef {import('multiformats/cid').CID} CID * * @typedef {object} DirProps * @property {boolean} root @@ -17,7 +18,7 @@ * @property {number} [mode] * @property {import('ipfs-unixfs').Mtime} [mtime] */ -class Dir { +export class Dir { /** * @param {DirProps} props * @param {ImporterOptions} options @@ -40,6 +41,8 @@ class Dir { this.cid = undefined /** @type {number | undefined} */ this.size = undefined + /** @type {number | undefined} */ + this.nodeSize = undefined } /** @@ -66,6 +69,18 @@ class Dir { * @returns {AsyncIterable} */ async * flush (blockstore) { } + + /** + * @returns {number} + */ + calculateNodeSize () { + return 0 + } } -export default Dir +// we use these to calculate the node size to use as a check for whether a directory +// should be sharded or not. Since CIDs have a constant length and We're only +// interested in the data length and not the actual content identifier we can use +// any old CID instead of having to hash the data which is expensive. +export const CID_V0 = CID.parse('QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn') +export const CID_V1 = CID.parse('zdj7WbTaiJT1fgatdet9Ei9iDB5hdCxkbVyhyh8YTUnXMiwYi') diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.js b/packages/ipfs-unixfs-importer/src/flat-to-shard.js index c46f3d3a..87cb6411 100644 --- a/packages/ipfs-unixfs-importer/src/flat-to-shard.js +++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.js @@ -2,7 +2,7 @@ import DirSharded from './dir-sharded.js' import DirFlat from './dir-flat.js' /** - * @typedef {import('./dir').default} Dir + * @typedef {import('./dir').Dir} Dir * @typedef {import('./types').ImporterOptions} ImporterOptions */ @@ -16,7 +16,7 @@ import DirFlat from './dir-flat.js' async function flatToShard (child, dir, threshold, options) { let newDir = dir - if (dir instanceof DirFlat && dir.directChildrenCount() >= threshold) { + if (dir instanceof DirFlat && dir.calculateNodeSize() > threshold) { newDir = await convertToShard(dir, options) } diff --git a/packages/ipfs-unixfs-importer/src/options.js b/packages/ipfs-unixfs-importer/src/options.js index 39efd91e..8cfc794b 100644 --- a/packages/ipfs-unixfs-importer/src/options.js +++ b/packages/ipfs-unixfs-importer/src/options.js @@ -35,7 +35,8 @@ const defaultOptions = { leafType: 'file', // 'raw' cidVersion: 0, progress: () => () => {}, - shardSplitThreshold: 1000, + // https://github.com/ipfs/go-ipfs/pull/8114/files#diff-eec963b47a6e1080d9d8023b4e438e6e3591b4154f7379a7e728401d2055374aR319 + shardSplitThreshold: 262144, fileImportConcurrency: 50, blockWriteConcurrency: 10, minChunkSize: 262144, diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.js b/packages/ipfs-unixfs-importer/src/tree-builder.js index 301ecad0..7b3a4978 100644 --- a/packages/ipfs-unixfs-importer/src/tree-builder.js +++ b/packages/ipfs-unixfs-importer/src/tree-builder.js @@ -1,6 +1,6 @@ import DirFlat from './dir-flat.js' import flatToShard from './flat-to-shard.js' -import Dir from './dir.js' +import { Dir } from './dir.js' import toPathComponents from './utils/to-path-components.js' /** diff --git a/packages/ipfs-unixfs/src/index.js b/packages/ipfs-unixfs/src/index.js index c50a27fd..3774cf19 100644 --- a/packages/ipfs-unixfs/src/index.js +++ b/packages/ipfs-unixfs/src/index.js @@ -25,7 +25,7 @@ const DEFAULT_FILE_MODE = parseInt('0644', 8) const DEFAULT_DIRECTORY_MODE = parseInt('0755', 8) /** - * @param {string | number | undefined} [mode] + * @param {string | number | null | undefined} [mode] */ export function parseMode (mode) { if (mode == null) { @@ -161,7 +161,7 @@ class UnixFS { * @param {number} [options.hashType] * @param {number} [options.fanout] * @param {MtimeLike | null} [options.mtime] - * @param {number | string} [options.mode] + * @param {number | string | null} [options.mode] */ constructor (options = { type: 'file' From 1727073a95b24892d396a46eb61250ba10699448 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 9 Feb 2023 10:44:20 +0100 Subject: [PATCH 2/3] chore: add interop tests --- packages/ipfs-unixfs-exporter/package.json | 3 - .../test/exporter-sharded.spec.js | 7 ++- .../test/exporter.spec.js | 1 - .../test/import-export-dir-sharding.spec.js | 8 +-- .../test/importer.spec.js | 4 +- packages/ipfs-unixfs-importer/README.md | 2 +- packages/ipfs-unixfs-importer/package.json | 2 +- packages/ipfs-unixfs-importer/src/dir-flat.js | 32 ++++------- packages/ipfs-unixfs-importer/src/options.js | 2 +- .../ipfs-unixfs-importer/src/tree-builder.js | 2 +- packages/ipfs-unixfs-importer/src/types.ts | 4 +- .../test/hash-parity-with-go-ipfs.spec.js | 55 +++++++++++++++++++ 12 files changed, 83 insertions(+), 39 deletions(-) diff --git a/packages/ipfs-unixfs-exporter/package.json b/packages/ipfs-unixfs-exporter/package.json index bd7269b6..9a09a499 100644 --- a/packages/ipfs-unixfs-exporter/package.json +++ b/packages/ipfs-unixfs-exporter/package.json @@ -169,17 +169,14 @@ }, "devDependencies": { "@types/sinon": "^10.0.0", - "abort-controller": "^3.0.0", "aegir": "^38.1.2", "blockstore-core": "^3.0.0", - "crypto-browserify": "^3.12.0", "delay": "^5.0.0", "ipfs-unixfs-importer": "^12.0.0", "it-all": "^2.0.0", "it-buffer-stream": "^3.0.0", "it-first": "^2.0.0", "merge-options": "^3.0.4", - "native-abort-controller": "^1.0.3", "sinon": "^15.0.0" }, "browser": { diff --git a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.js b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.js index 6b2738dd..c744ddd1 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.js +++ b/packages/ipfs-unixfs-exporter/test/exporter-sharded.spec.js @@ -45,7 +45,7 @@ describe('exporter sharded', function () { */ const createShardWithFiles = async (files) => { const result = await last(importer(files, block, { - shardSplitThreshold: SHARD_SPLIT_THRESHOLD, + shardSplitThresholdBytes: SHARD_SPLIT_THRESHOLD, wrapWithDirectory: true })) @@ -60,7 +60,8 @@ describe('exporter sharded', function () { /** @type {{ [key: string]: { content: Uint8Array, cid?: CID }}} */ const files = {} - for (let i = 0; i < (SHARD_SPLIT_THRESHOLD + 1); i++) { + // needs to result in a block that is larger than SHARD_SPLIT_THRESHOLD bytes + for (let i = 0; i < 100; i++) { files[`file-${Math.random()}.txt`] = { content: uint8ArrayConcat(await all(randomBytes(100))) } @@ -71,7 +72,7 @@ describe('exporter sharded', function () { content: asAsyncIterable(files[path].content) })), block, { wrapWithDirectory: true, - shardSplitThreshold: SHARD_SPLIT_THRESHOLD + shardSplitThresholdBytes: SHARD_SPLIT_THRESHOLD })) const dirCid = imported.pop()?.cid diff --git a/packages/ipfs-unixfs-exporter/test/exporter.spec.js b/packages/ipfs-unixfs-exporter/test/exporter.spec.js index e54cbcd4..426384e9 100644 --- a/packages/ipfs-unixfs-exporter/test/exporter.spec.js +++ b/packages/ipfs-unixfs-exporter/test/exporter.spec.js @@ -14,7 +14,6 @@ import all from 'it-all' import last from 'it-last' import first from 'it-first' import randomBytes from 'it-buffer-stream' -import { AbortController } from 'native-abort-controller' import blockApi from './helpers/block.js' import { concat as uint8ArrayConcat } from 'uint8arrays/concat' import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' diff --git a/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js b/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js index c0b5e9c8..09f499eb 100644 --- a/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js +++ b/packages/ipfs-unixfs-exporter/test/import-export-dir-sharding.spec.js @@ -26,7 +26,7 @@ describe('builder: directory sharding', () => { path: 'a/b', content: asAsyncIterable(content) }], block, { - shardSplitThreshold: Infinity // never shard + shardSplitThresholdBytes: Infinity // never shard })) expect(nodes.length).to.equal(2) @@ -62,7 +62,7 @@ describe('builder: directory sharding', () => { path: 'a/b', content: asAsyncIterable(uint8ArrayFromString('i have the best bytes')) }], block, { - shardSplitThreshold: 0 // always shard + shardSplitThresholdBytes: 0 // always shard })) expect(nodes.length).to.equal(2) @@ -84,7 +84,7 @@ describe('builder: directory sharding', () => { path: 'a/b', content: asAsyncIterable(uint8ArrayFromString(content)) }], block, { - shardSplitThreshold: Infinity // never shard + shardSplitThresholdBytes: Infinity // never shard })) const nonShardedHash = nodes[1].cid @@ -121,7 +121,7 @@ describe('builder: directory sharding', () => { path: 'a/b', content: asAsyncIterable(uint8ArrayFromString(content)) }], block, { - shardSplitThreshold: 0 // always shard + shardSplitThresholdBytes: 0 // always shard })) const shardedHash = nodes[1].cid diff --git a/packages/ipfs-unixfs-exporter/test/importer.spec.js b/packages/ipfs-unixfs-exporter/test/importer.spec.js index 66def5d5..84abcdad 100644 --- a/packages/ipfs-unixfs-exporter/test/importer.spec.js +++ b/packages/ipfs-unixfs-exporter/test/importer.spec.js @@ -724,7 +724,7 @@ strategies.forEach((strategy) => { const options = { cidVersion: 1, // Ensures we use DirSharded for the data below - shardSplitThreshold: 3 + shardSplitThresholdBytes: 3 } const files = await all(importer(inputFiles.map(file => ({ @@ -941,7 +941,7 @@ strategies.forEach((strategy) => { }, { path: '/foo/qux' }], block, { - shardSplitThreshold: 0 + shardSplitThresholdBytes: 0 })) const nodes = await all(recursive(entries[entries.length - 1].cid, block)) diff --git a/packages/ipfs-unixfs-importer/README.md b/packages/ipfs-unixfs-importer/README.md index 40e5e8c4..c2f1f1a2 100644 --- a/packages/ipfs-unixfs-importer/README.md +++ b/packages/ipfs-unixfs-importer/README.md @@ -120,7 +120,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb `options` is an JavaScript option that might include the following keys: - `wrapWithDirectory` (boolean, defaults to false): if true, a wrapping node will be created -- `shardSplitThreshold` (positive integer, defaults to 256KiB): if the serialized node is larger than this it will be converted to a HAMT sharded directory +- `shardSplitThresholdBytes` (positive integer, defaults to 256KiB): if the serialized node is larger than this it will be converted to a HAMT sharded directory - `chunker` (string, defaults to `"fixed"`): the chunking strategy. Supports: - `fixed` - `rabin` diff --git a/packages/ipfs-unixfs-importer/package.json b/packages/ipfs-unixfs-importer/package.json index 0bbf354d..a97452d5 100644 --- a/packages/ipfs-unixfs-importer/package.json +++ b/packages/ipfs-unixfs-importer/package.json @@ -169,9 +169,9 @@ }, "devDependencies": { "aegir": "^38.1.2", - "assert": "^2.0.0", "blockstore-core": "^3.0.0", "it-buffer-stream": "^3.0.0", + "it-last": "^2.0.0", "wherearewe": "^2.0.1" }, "browser": { diff --git a/packages/ipfs-unixfs-importer/src/dir-flat.js b/packages/ipfs-unixfs-importer/src/dir-flat.js index c76495c6..bc097e81 100644 --- a/packages/ipfs-unixfs-importer/src/dir-flat.js +++ b/packages/ipfs-unixfs-importer/src/dir-flat.js @@ -21,8 +21,8 @@ class DirFlat extends Dir { constructor (props, options) { super(props, options) - /** @type {{ [key: string]: InProgressImportResult | Dir }} */ - this._children = {} + /** @type {Map} */ + this._children = new Map() } /** @@ -34,18 +34,18 @@ class DirFlat extends Dir { this.size = undefined this.nodeSize = undefined - this._children[name] = value + this._children.set(name, value) } /** * @param {string} name */ get (name) { - return Promise.resolve(this._children[name]) + return Promise.resolve(this._children.get(name)) } childCount () { - return Object.keys(this._children).length + return this._children.size } directChildrenCount () { @@ -53,18 +53,14 @@ class DirFlat extends Dir { } onlyChild () { - return this._children[Object.keys(this._children)[0]] + return this._children.values().next().value } async * eachChildSeries () { - const keys = Object.keys(this._children) - - for (let i = 0; i < keys.length; i++) { - const key = keys[i] - + for (const [key, child] of this._children.entries()) { yield { - key: key, - child: this._children[key] + key, + child } } } @@ -76,8 +72,7 @@ class DirFlat extends Dir { const links = [] - for (const name of Object.keys(this._children)) { - const child = this._children[name] + for (const [name, child] of this._children.entries()) { let size if (child instanceof Dir) { @@ -111,12 +106,9 @@ class DirFlat extends Dir { * @returns {AsyncIterable} */ async * flush (block) { - const children = Object.keys(this._children) const links = [] - for (let i = 0; i < children.length; i++) { - let child = this._children[children[i]] - + for (let [name, child] of this._children.entries()) { if (child instanceof Dir) { for await (const entry of child.flush(block)) { child = entry @@ -127,7 +119,7 @@ class DirFlat extends Dir { if (child.size != null && child.cid) { links.push({ - Name: children[i], + Name: name, Tsize: child.size, Hash: child.cid }) diff --git a/packages/ipfs-unixfs-importer/src/options.js b/packages/ipfs-unixfs-importer/src/options.js index 783d46ff..9359368f 100644 --- a/packages/ipfs-unixfs-importer/src/options.js +++ b/packages/ipfs-unixfs-importer/src/options.js @@ -34,7 +34,7 @@ const defaultOptions = { cidVersion: 0, progress: () => () => {}, // https://github.com/ipfs/go-ipfs/pull/8114/files#diff-eec963b47a6e1080d9d8023b4e438e6e3591b4154f7379a7e728401d2055374aR319 - shardSplitThreshold: 262144, + shardSplitThresholdBytes: 262144, fileImportConcurrency: 50, blockWriteConcurrency: 10, minChunkSize: 262144, diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.js b/packages/ipfs-unixfs-importer/src/tree-builder.js index 7b3a4978..7a2df8cd 100644 --- a/packages/ipfs-unixfs-importer/src/tree-builder.js +++ b/packages/ipfs-unixfs-importer/src/tree-builder.js @@ -34,7 +34,7 @@ async function addToTree (elem, tree, options) { if (last) { await parent.put(pathElem, elem) - tree = await flatToShard(null, parent, options.shardSplitThreshold, options) + tree = await flatToShard(null, parent, options.shardSplitThresholdBytes, options) } else { let dir = await parent.get(pathElem) diff --git a/packages/ipfs-unixfs-importer/src/types.ts b/packages/ipfs-unixfs-importer/src/types.ts index 84779d52..e0f41133 100644 --- a/packages/ipfs-unixfs-importer/src/types.ts +++ b/packages/ipfs-unixfs-importer/src/types.ts @@ -57,7 +57,7 @@ export interface UserImporterOptions { leafType?: 'file' | 'raw' cidVersion?: CIDVersion progress?: ProgressHandler - shardSplitThreshold?: number + shardSplitThresholdBytes?: number fileImportConcurrency?: number blockWriteConcurrency?: number minChunkSize?: number @@ -90,7 +90,7 @@ export interface ImporterOptions { leafType: 'file' | 'raw' cidVersion: CIDVersion progress: ProgressHandler - shardSplitThreshold: number + shardSplitThresholdBytes: number fileImportConcurrency: number blockWriteConcurrency: number minChunkSize: number diff --git a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js index e8eb1ec2..465e087e 100644 --- a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js +++ b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js @@ -4,8 +4,10 @@ import { importer } from '../src/index.js' import { expect } from 'aegir/chai' import randomByteStream from './helpers/finite-pseudorandom-byte-stream.js' import first from 'it-first' +import last from 'it-last' import blockApi from './helpers/block.js' import defaultOptions from '../src/options.js' +import { fromString as uint8ArrayFromString } from 'uint8arrays/from-string' /** @type {('flat' | 'trickle' | 'balanced')[]} */ const strategies = [ @@ -53,3 +55,56 @@ strategies.forEach(strategy => { }) }) }) + +describe('go-ipfs auto-sharding interop', function () { + this.timeout(100 * 1000) + + /** + * @param {number} count + */ + function buildSource (count) { + return new Array(count).fill(0).map((_, index) => { + const string = `long name to fill out bytes to make the sharded directory test flip over the sharded directory limit because link names are included in the directory entry ${index}` + + return { + path: `rootDir/${string}`, + content: uint8ArrayFromString(string) + } + }) + } + + const block = blockApi() + const threshold = 1343 + + it('uses the same shard threshold as go-unixfsnode (under threshold)', async function () { + const result = await last(importer(buildSource(threshold), block, { + cidVersion: 1, + rawLeaves: true, + // TODO: https://github.com/ipfs/js-ipfs-unixfs/pull/171#issuecomment-1423893967 + shardSplitThresholdBytes: 512000 + })) + + if (!result) { + throw new Error('Nothing imported') + } + + expect(result).to.have.property('size', 490665) + expect(result).to.have.nested.property('unixfs.type', 'directory') + expect(result.cid.toString()).to.be.equal('bafybeihecq4rpl4nw3cgfb2uiwltgsmw5sutouvuldv5fxn4gfbihvnalq') + }) + + it('uses the same shard threshold as go-unixfsnode (over threshold)', async function () { + const result = await last(importer(buildSource(threshold + 1), block, { + cidVersion: 1, + rawLeaves: true + })) + + if (!result) { + throw new Error('Nothing imported') + } + + expect(result).to.have.property('size', 515735) + expect(result).to.have.nested.property('unixfs.type', 'hamt-sharded-directory') + expect(result.cid.toString()).to.be.equal('bafybeigyvxs6og5jbmpaa43qbhhd5swklqcfzqdrtjgfh53qjon6hpjaye') + }) +}) From 288910ffe735ba5eee9a5007491b9b7ffa4567ab Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 9 Feb 2023 12:51:32 +0100 Subject: [PATCH 3/3] chore: estimate size, do not calculate size --- packages/ipfs-unixfs-importer/README.md | 2 +- packages/ipfs-unixfs-importer/src/dir-flat.js | 28 ++++--------------- .../ipfs-unixfs-importer/src/dir-sharded.js | 2 +- packages/ipfs-unixfs-importer/src/dir.js | 2 +- .../ipfs-unixfs-importer/src/flat-to-shard.js | 2 +- .../test/hash-parity-with-go-ipfs.spec.js | 4 +-- 6 files changed, 10 insertions(+), 30 deletions(-) diff --git a/packages/ipfs-unixfs-importer/README.md b/packages/ipfs-unixfs-importer/README.md index c2f1f1a2..e64e3aa3 100644 --- a/packages/ipfs-unixfs-importer/README.md +++ b/packages/ipfs-unixfs-importer/README.md @@ -120,7 +120,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb `options` is an JavaScript option that might include the following keys: - `wrapWithDirectory` (boolean, defaults to false): if true, a wrapping node will be created -- `shardSplitThresholdBytes` (positive integer, defaults to 256KiB): if the serialized node is larger than this it will be converted to a HAMT sharded directory +- `shardSplitThresholdBytes` (positive integer, defaults to 256KiB): if the serialized node is larger than this it might be converted to a HAMT sharded directory - `chunker` (string, defaults to `"fixed"`): the chunking strategy. Supports: - `fixed` - `rabin` diff --git a/packages/ipfs-unixfs-importer/src/dir-flat.js b/packages/ipfs-unixfs-importer/src/dir-flat.js index bc097e81..4aeb069f 100644 --- a/packages/ipfs-unixfs-importer/src/dir-flat.js +++ b/packages/ipfs-unixfs-importer/src/dir-flat.js @@ -65,39 +65,21 @@ class DirFlat extends Dir { } } - calculateNodeSize () { + estimateNodeSize () { if (this.nodeSize !== undefined) { return this.nodeSize } - const links = [] + this.nodeSize = 0 + // estimate size only based on DAGLink name and CID byte lengths + // https://github.com/ipfs/go-unixfsnode/blob/37b47f1f917f1b2f54c207682f38886e49896ef9/data/builder/directory.go#L81-L96 for (const [name, child] of this._children.entries()) { - let size - - if (child instanceof Dir) { - size = child.calculateNodeSize() - } else { - size = child.size - } - if (child.size != null && child.cid) { - links.push({ - Name: name, - Tsize: size, - Hash: this.options.cidVersion === 0 ? CID_V0 : CID_V1 - }) + this.nodeSize += name.length + (this.options.cidVersion === 1 ? CID_V1.bytes.byteLength : CID_V0.bytes.byteLength) } } - const unixfs = new UnixFS({ - type: 'directory', - mtime: this.mtime, - mode: this.mode - }) - - this.nodeSize = encode(prepare({ Data: unixfs.marshal(), Links: links })).length - return this.nodeSize } diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.js b/packages/ipfs-unixfs-importer/src/dir-sharded.js index 07839e29..2cde017b 100644 --- a/packages/ipfs-unixfs-importer/src/dir-sharded.js +++ b/packages/ipfs-unixfs-importer/src/dir-sharded.js @@ -70,7 +70,7 @@ class DirSharded extends Dir { } } - calculateNodeSize () { + estimateNodeSize () { if (this.nodeSize !== undefined) { return this.nodeSize } diff --git a/packages/ipfs-unixfs-importer/src/dir.js b/packages/ipfs-unixfs-importer/src/dir.js index e0021ad1..97a107b5 100644 --- a/packages/ipfs-unixfs-importer/src/dir.js +++ b/packages/ipfs-unixfs-importer/src/dir.js @@ -73,7 +73,7 @@ export class Dir { /** * @returns {number} */ - calculateNodeSize () { + estimateNodeSize () { return 0 } } diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.js b/packages/ipfs-unixfs-importer/src/flat-to-shard.js index 87cb6411..af083e49 100644 --- a/packages/ipfs-unixfs-importer/src/flat-to-shard.js +++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.js @@ -16,7 +16,7 @@ import DirFlat from './dir-flat.js' async function flatToShard (child, dir, threshold, options) { let newDir = dir - if (dir instanceof DirFlat && dir.calculateNodeSize() > threshold) { + if (dir instanceof DirFlat && dir.estimateNodeSize() > threshold) { newDir = await convertToShard(dir, options) } diff --git a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js index 465e087e..268073d1 100644 --- a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js +++ b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js @@ -79,9 +79,7 @@ describe('go-ipfs auto-sharding interop', function () { it('uses the same shard threshold as go-unixfsnode (under threshold)', async function () { const result = await last(importer(buildSource(threshold), block, { cidVersion: 1, - rawLeaves: true, - // TODO: https://github.com/ipfs/js-ipfs-unixfs/pull/171#issuecomment-1423893967 - shardSplitThresholdBytes: 512000 + rawLeaves: true })) if (!result) {