From 2ff6608dc89f709b3cfc424d8c4535bdae6698b9 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 6 Oct 2022 18:02:38 +0200 Subject: [PATCH 01/39] Initial Commit It can download single block raw leaves files over car. --- .gitignore | 1 + cmd/feather/main.go | 51 +++++++++++ entry.go | 209 ++++++++++++++++++++++++++++++++++++++++++++ go.mod | 28 ++++++ go.sum | 80 +++++++++++++++++ 5 files changed, 369 insertions(+) create mode 100644 .gitignore create mode 100644 cmd/feather/main.go create mode 100644 entry.go create mode 100644 go.mod create mode 100644 go.sum diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..22d0d82f8 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +vendor diff --git a/cmd/feather/main.go b/cmd/feather/main.go new file mode 100644 index 000000000..8165129bb --- /dev/null +++ b/cmd/feather/main.go @@ -0,0 +1,51 @@ +package main + +import ( + "fmt" + "io" + "os" + + "github.com/Jorropo/go-featheripfs" + "github.com/ipfs/go-cid" +) + +func main() { + err := mainRet() + if err != nil { + os.Stderr.WriteString(err.Error()) + os.Exit(1) + } + os.Exit(0) +} + +func parseArgs() (cid.Cid, error) { + if len(os.Args) != 2 { + return cid.Cid{}, fmt.Errorf("expected one argument") + } + + return cid.Decode(os.Args[1]) +} + +func mainRet() error { + c, err := parseArgs() + if err != nil { + return fmt.Errorf(`%w +Usage: +%s + +Example: +%s bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi`, err, os.Args[0], os.Args[0]) + } + + r, err := feather.DownloadFile(c) + if err != nil { + return err + } + defer r.Close() + + _, err = io.Copy(os.Stdout, r) + if err != nil { + return err + } + return nil +} diff --git a/entry.go b/entry.go new file mode 100644 index 000000000..3eb93c66a --- /dev/null +++ b/entry.go @@ -0,0 +1,209 @@ +package feather + +import ( + "bufio" + "bytes" + "encoding/binary" + "fmt" + "io" + "net/http" + + "github.com/ipfs/go-cid" + cbor "github.com/ipfs/go-ipld-cbor" + "github.com/ipfs/go-verifcid" + mh "github.com/multiformats/go-multihash" +) + +// clone recovers the extra capacity allocated +func clone[T any](s ...T) []T { + var zeroSlice []T + return append(zeroSlice, s...) +} + +func cidStringTruncate(c cid.Cid) string { + cidStr := c.String() + if len(cidStr) > maxCidCharDisplay { + // please don't use non ASCII bases + cidStr = cidStr[:maxCidCharDisplay] + "..." + } + return cidStr +} + +type carHeader struct { + Roots []cid.Cid + Version uint64 +} + +func init() { + cbor.RegisterCborType(carHeader{}) +} + +const gateway = "https://ipfs.io/ipfs/" +const maxHeaderSize = 32 * 1024 * 1024 // 32MiB +const maxBlockSize = 2 * 1024 * 1024 // 2MiB +const maxCidSize = 4096 +const maxCidCharDisplay = 512 + +type region struct { + c cid.Cid + low uint64 + high uint64 + rangeKnown bool +} + +type downloader struct { + io.Closer + + buf bufio.Reader + state [][]region + curBlock int +} + +// If DownloadFile returns a non nil error, you MUST call Close on the reader, +// even if reader.Read returns an error. +func DownloadFile(c cid.Cid) (io.ReadCloser, error) { + req, err := http.NewRequest("GET", gateway+c.String(), bytes.NewReader(nil)) + if err != nil { + return nil, err + } + // FIXME: Specify ordered DFS with duplicates + req.Header.Add("Accept", "application/vnd.ipld.car") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + var good bool + defer func() { + if !good { + resp.Body.Close() + } + }() + + r := &downloader{ + Closer: resp.Body, + state: clone(clone(region{low: 0, high: 1<<64 - 1, c: c})), + } + r.buf = *bufio.NewReaderSize(resp.Body, maxBlockSize*2+4096*2) + + headerSize, err := binary.ReadUvarint(&r.buf) + if err != nil { + return nil, err + } + if headerSize > maxHeaderSize { + return nil, fmt.Errorf("header is to big at %d instead of %d", headerSize, maxHeaderSize) + } + + b := make([]byte, headerSize) + _, err = io.ReadFull(&r.buf, b) + if err != nil { + return nil, err + } + + h := carHeader{} + err = cbor.DecodeInto(b, &h) + if err != nil { + return nil, err + } + + const supportedVersion = 1 + if h.Version != supportedVersion { + return nil, fmt.Errorf("unsupported version %d instead of %d", h.Version, supportedVersion) + } + if len(h.Roots) != 1 { + return nil, fmt.Errorf("header has more roots than expected %d instead of 1", len(h.Roots)) + } + if h.Roots[0] != c { + return nil, fmt.Errorf("header root don't match, got %s instead of %s", cidStringTruncate(h.Roots[0]), c.String()) + } + + good = true + + return r, nil +} + +func (d *downloader) Read(b []byte) (int, error) { + if d.curBlock == 0 { + // have to fill more data in the buffer + if len(d.state) == 0 { + // no more data remaining + return 0, io.EOF + } + todos := d.state[len(d.state)-1] + todo := todos[0] + var data []byte + c := todo.c + + pref := c.Prefix() + switch pref.MhType { + case mh.IDENTITY: + data = c.Hash()[1:] // skip the 0x00 prefix + default: + if err := verifcid.ValidateCid(c); err != nil { + return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) + } + itemLenU, err := binary.ReadUvarint(&d.buf) + if err != nil { + return 0, err + } + if itemLenU > maxBlockSize+maxCidSize { + return 0, fmt.Errorf("item size (%d) for %s exceed maxBlockSize+maxCidSize (%d)", itemLenU, cidStringTruncate(c), maxBlockSize+maxCidSize) + } + itemLen := int(itemLenU) + + cidLen, cidFound, err := cid.CidFromReader(&d.buf) + if err != nil { + return 0, fmt.Errorf("trying to read %s failed to read cid: %w", cidStringTruncate(c), err) + } + if cidLen > maxCidSize { + return 0, fmt.Errorf("cidFound for %s is too big at %d bytes", cidStringTruncate(c), cidLen) + } + if cidFound != c { + return 0, fmt.Errorf("downloading %s but got %s instead", cidStringTruncate(c), cidStringTruncate(cidFound)) + } + + blockSize := itemLen - cidLen + if blockSize > maxBlockSize { + return 0, fmt.Errorf("block %s is too big (%d) max %d", cidStringTruncate(c), blockSize, maxBlockSize) + } + // TODO: fast path read directly into b if len(b) <= blockSize and type is raw + data, err = d.buf.Peek(blockSize) + if err != nil { + return 0, fmt.Errorf("getting block data for %s for verification: %w", cidStringTruncate(c), err) + } + cidGot, err := pref.Sum(data) + if err != nil { + return 0, fmt.Errorf("hashing data for %s: %w", cidStringTruncate(c), err) + } + + if cidGot != c { + return 0, fmt.Errorf("data integrity failed, expected %s; got %s", cidStringTruncate(c), cidStringTruncate(cidGot)) + } + } + + switch pref.Codec { + case cid.Raw: + if todo.rangeKnown { + expectedSize := todo.high - todo.low + if uint64(len(data)) != expectedSize { + return 0, fmt.Errorf("leaf isn't size is incorrect, expected %d; got %d", len(data), expectedSize) + } + } + d.curBlock = len(data) + case cid.DagProtobuf: + return 0, fmt.Errorf("TODO: Unimplemented DagProtobuf") + default: + return 0, fmt.Errorf("unknown codec type %d; expected Raw or Dag-PB", pref.Codec) + } + } + + toRead := d.curBlock + + if len(b) < toRead { + toRead = len(b) + } + + n, err := d.buf.Read(b[:toRead]) + d.curBlock -= n + return n, err +} diff --git a/go.mod b/go.mod new file mode 100644 index 000000000..f746cd437 --- /dev/null +++ b/go.mod @@ -0,0 +1,28 @@ +module github.com/Jorropo/go-featheripfs + +go 1.19 + +require ( + github.com/ipfs/go-block-format v0.0.2 // indirect + github.com/ipfs/go-cid v0.3.2 // indirect + github.com/ipfs/go-ipfs-util v0.0.1 // indirect + github.com/ipfs/go-ipld-cbor v0.0.6 // indirect + github.com/ipfs/go-ipld-format v0.0.1 // indirect + github.com/ipfs/go-verifcid v0.0.2 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect + github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect + github.com/minio/sha256-simd v1.0.0 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect + github.com/multiformats/go-base32 v0.0.3 // indirect + github.com/multiformats/go-base36 v0.1.0 // indirect + github.com/multiformats/go-multibase v0.0.3 // indirect + github.com/multiformats/go-multihash v0.2.1 // indirect + github.com/multiformats/go-varint v0.0.6 // indirect + github.com/polydawn/refmt v0.0.0-20190221155625-df39d6c2d992 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect + github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 // indirect + golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect + golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 // indirect + golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect + lukechampine.com/blake3 v1.1.6 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 000000000..7fe9c763d --- /dev/null +++ b/go.sum @@ -0,0 +1,80 @@ +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= +github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= +github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48= +github.com/ipfs/go-block-format v0.0.2 h1:qPDvcP19izTjU8rgo6p7gTXZlkMkF5bz5G3fqIsSCPE= +github.com/ipfs/go-block-format v0.0.2/go.mod h1:AWR46JfpcObNfg3ok2JHDUfdiHRgWhJgCQF+KIgOPJY= +github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= +github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= +github.com/ipfs/go-cid v0.3.2 h1:OGgOd+JCFM+y1DjWPmVH+2/4POtpDzwcr7VgnB7mZXc= +github.com/ipfs/go-cid v0.3.2/go.mod h1:gQ8pKqT/sUxGY+tIwy1RPpAojYu7jAyCp5Tz1svoupw= +github.com/ipfs/go-ipfs-util v0.0.1 h1:Wz9bL2wB2YBJqggkA4dD7oSmqB4cAnpNbGrlHJulv50= +github.com/ipfs/go-ipfs-util v0.0.1/go.mod h1:spsl5z8KUnrve+73pOhSVZND1SIxPW5RyBCNzQxlJBc= +github.com/ipfs/go-ipld-cbor v0.0.6 h1:pYuWHyvSpIsOOLw4Jy7NbBkCyzLDcl64Bf/LZW7eBQ0= +github.com/ipfs/go-ipld-cbor v0.0.6/go.mod h1:ssdxxaLJPXH7OjF5V4NSjBbcfh+evoR4ukuru0oPXMA= +github.com/ipfs/go-ipld-format v0.0.1 h1:HCu4eB/Gh+KD/Q0M8u888RFkorTWNIL3da4oc5dwc80= +github.com/ipfs/go-ipld-format v0.0.1/go.mod h1:kyJtbkDALmFHv3QR6et67i35QzO3S0dCDnkOJhcZkms= +github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= +github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU= +github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= +github.com/klauspost/cpuid/v2 v2.0.4 h1:g0I61F2K2DjRHz1cnxlkNSBIaePVoJIjjnHui8QHbiw= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g= +github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ= +github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16/go.mod h1:2FMWW+8GMoPweT6+pI63m9YE3Lmw4J71hV56Chs1E/U= +github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= +github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8= +github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/multiformats/go-base32 v0.0.3 h1:tw5+NhuwaOjJCC5Pp82QuXbrmLzWg7uxlMFp8Nq/kkI= +github.com/multiformats/go-base32 v0.0.3/go.mod h1:pLiuGC8y0QR3Ue4Zug5UzK9LjgbkL8NSQj0zQ5Nz/AA= +github.com/multiformats/go-base36 v0.1.0 h1:JR6TyF7JjGd3m6FbLU2cOxhC0Li8z8dLNGQ89tUg4F4= +github.com/multiformats/go-base36 v0.1.0/go.mod h1:kFGE83c6s80PklsHO9sRn2NCoffoRdUUOENyW/Vv6sM= +github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= +github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77RblWplfIqk= +github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= +github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= +github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= +github.com/multiformats/go-multihash v0.0.15 h1:hWOPdrNqDjwHDx82vsYGSDZNyktOJJ2dzZJzFkOV1jM= +github.com/multiformats/go-multihash v0.0.15/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg= +github.com/multiformats/go-multihash v0.2.1 h1:aem8ZT0VA2nCHHk7bPJ1BjUbHNciqZC/d16Vve9l108= +github.com/multiformats/go-multihash v0.2.1/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= +github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY= +github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= +github.com/polydawn/refmt v0.0.0-20190221155625-df39d6c2d992 h1:bzMe+2coZJYHnhGgVlcQKuRy4FSny4ds8dLQjw5P1XE= +github.com/polydawn/refmt v0.0.0-20190221155625-df39d6c2d992/go.mod h1:uIp+gprXxxrWSjjklXD+mN4wed/tMfjMMmN/9+JsA9o= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v0.0.0-20190222223459-a17d461953aa/go.mod h1:2RVY1rIf+2J2o/IM9+vPq9RzmHDSseB7FoXiSNIUsoU= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= +github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 h1:WXhVOwj2USAXB5oMDwRl3piOux2XMV9TANaYxXHdkoE= +github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158/go.mod h1:Xj/M2wWU+QdTdRbu/L/1dIZY8/Wb2K9pAhtroQuxJJI= +golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= +golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf h1:B2n+Zi5QeYRDAEodEu72OS36gmTWjgpXr2+cWcBW90o= +golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= +golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM= +golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY= +golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 h1:SrN+KX8Art/Sf4HNj6Zcz06G7VEz+7w9tdXTPOZ7+l4= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +lukechampine.com/blake3 v1.1.6 h1:H3cROdztr7RCfoaTpGZFQsrqvweFLrqS73j7L7cmR5c= +lukechampine.com/blake3 v1.1.6/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA= From 3bbf591b43519323d5f72ac5439055f911388174 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 6 Oct 2022 22:10:31 +0200 Subject: [PATCH 02/39] support multiblock files --- cmd/feather/main.go | 5 +- entry.go | 194 ++++++++++++++-- go.mod | 1 + go.sum | 5 + internal/pb/Makefile | 11 + internal/pb/file.pb.go | 488 +++++++++++++++++++++++++++++++++++++++++ internal/pb/file.proto | 47 ++++ 7 files changed, 728 insertions(+), 23 deletions(-) create mode 100644 internal/pb/Makefile create mode 100644 internal/pb/file.pb.go create mode 100644 internal/pb/file.proto diff --git a/cmd/feather/main.go b/cmd/feather/main.go index 8165129bb..7e20fa4d9 100644 --- a/cmd/feather/main.go +++ b/cmd/feather/main.go @@ -13,6 +13,7 @@ func main() { err := mainRet() if err != nil { os.Stderr.WriteString(err.Error()) + os.Stderr.WriteString("\n") os.Exit(1) } os.Exit(0) @@ -39,13 +40,13 @@ Example: r, err := feather.DownloadFile(c) if err != nil { - return err + return fmt.Errorf("Error starting file download: %w", err) } defer r.Close() _, err = io.Copy(os.Stdout, r) if err != nil { - return err + return fmt.Errorf("Error downloading file: %w", err) } return nil } diff --git a/entry.go b/entry.go index 3eb93c66a..01f7d96dd 100644 --- a/entry.go +++ b/entry.go @@ -8,16 +8,17 @@ import ( "io" "net/http" + pb "github.com/Jorropo/go-featheripfs/internal/pb" + "google.golang.org/protobuf/proto" + "github.com/ipfs/go-cid" cbor "github.com/ipfs/go-ipld-cbor" "github.com/ipfs/go-verifcid" mh "github.com/multiformats/go-multihash" ) -// clone recovers the extra capacity allocated -func clone[T any](s ...T) []T { - var zeroSlice []T - return append(zeroSlice, s...) +func create[T any](len int) []T { + return append([]T{}, make([]T, len)...) } func cidStringTruncate(c cid.Cid) string { @@ -38,7 +39,7 @@ func init() { cbor.RegisterCborType(carHeader{}) } -const gateway = "https://ipfs.io/ipfs/" +const gateway = "http://localhost:8080/ipfs/" const maxHeaderSize = 32 * 1024 * 1024 // 32MiB const maxBlockSize = 2 * 1024 * 1024 // 2MiB const maxCidSize = 4096 @@ -56,7 +57,7 @@ type downloader struct { buf bufio.Reader state [][]region - curBlock int + curBlock []byte } // If DownloadFile returns a non nil error, you MUST call Close on the reader, @@ -82,7 +83,7 @@ func DownloadFile(c cid.Cid) (io.ReadCloser, error) { r := &downloader{ Closer: resp.Body, - state: clone(clone(region{low: 0, high: 1<<64 - 1, c: c})), + state: [][]region{{{low: 0, high: 1<<64 - 1, c: c}}}, } r.buf = *bufio.NewReaderSize(resp.Body, maxBlockSize*2+4096*2) @@ -122,15 +123,42 @@ func DownloadFile(c cid.Cid) (io.ReadCloser, error) { return r, nil } +func loadCidFromBytes(cidBytes []byte) (cid.Cid, error) { + if len(cidBytes) == 0 { + return cid.Cid{}, fmt.Errorf("missing CID") + } + if len(cidBytes) > maxCidSize { + return cid.Cid{}, fmt.Errorf("CID is too big, %d max allowed %d", len(cidBytes), maxCidSize) + } + + c, err := cid.Cast(cidBytes) + if err != nil { + return cid.Cid{}, fmt.Errorf("malphormed CID: %w", err) + } + + return c, nil +} + func (d *downloader) Read(b []byte) (int, error) { - if d.curBlock == 0 { + for len(d.curBlock) == 0 { // have to fill more data in the buffer if len(d.state) == 0 { // no more data remaining return 0, io.EOF } - todos := d.state[len(d.state)-1] + + // pop current item from the DFS stack + last := len(d.state) - 1 + todos := d.state[last] todo := todos[0] + todos = todos[1:] + if len(todos) == 0 { + d.state[last] = nil // early gc + d.state = d.state[:last] + } else { + d.state[last] = todos + } + var data []byte c := todo.c @@ -169,8 +197,17 @@ func (d *downloader) Read(b []byte) (int, error) { // TODO: fast path read directly into b if len(b) <= blockSize and type is raw data, err = d.buf.Peek(blockSize) if err != nil { - return 0, fmt.Errorf("getting block data for %s for verification: %w", cidStringTruncate(c), err) + if err == io.EOF { + // don't show io.EOF in case peeking is too short + err = io.ErrUnexpectedEOF + } + return 0, fmt.Errorf("Peeking at block data for %s verification: %w", cidStringTruncate(c), err) + } + _, err = d.buf.Discard(len(data)) + if err != nil { + return 0, fmt.Errorf("Critical: Discard is supposed to always succeed as long as we don't read less than buffered: %w", err) } + cidGot, err := pref.Sum(data) if err != nil { return 0, fmt.Errorf("hashing data for %s: %w", cidStringTruncate(c), err) @@ -186,24 +223,139 @@ func (d *downloader) Read(b []byte) (int, error) { if todo.rangeKnown { expectedSize := todo.high - todo.low if uint64(len(data)) != expectedSize { - return 0, fmt.Errorf("leaf isn't size is incorrect, expected %d; got %d", len(data), expectedSize) + return 0, fmt.Errorf("leaf isn't size is incorrect for %s, expected %d; got %d", cidStringTruncate(c), len(data), expectedSize) } } - d.curBlock = len(data) + d.curBlock = data case cid.DagProtobuf: - return 0, fmt.Errorf("TODO: Unimplemented DagProtobuf") + var block pb.PBNode + err := proto.Unmarshal(data, &block) + if err != nil { + return 0, fmt.Errorf("parsing block for %s: %w", cidStringTruncate(c), err) + } + + if len(block.Data) == 0 { + return 0, fmt.Errorf("block %s is missing Data field", cidStringTruncate(c)) + } + + var metadata pb.UnixfsData + err = proto.Unmarshal(block.Data, &metadata) + if err != nil { + return 0, fmt.Errorf("parsing metadata for %s: %w", cidStringTruncate(c), err) + } + + if metadata.Type == nil { + return 0, fmt.Errorf("missing unixfs node Type for %s", cidStringTruncate(c)) + } + switch *metadata.Type { + case pb.UnixfsData_File: + blocksizes := metadata.Blocksizes + links := block.Links + if len(blocksizes) != len(links) { + return 0, fmt.Errorf("inconsistent sisterlists for %s, %d vs %d", cidStringTruncate(c), len(blocksizes), len(links)) + } + + if todo.rangeKnown { + if todo.low < uint64(len(metadata.Data)) { + high := uint64(len(metadata.Data)) + if high > todo.high { + high = todo.high + } + d.curBlock = metadata.Data[todo.low:high] + } + } else { + d.curBlock = metadata.Data + } + + filesize := uint64(len(metadata.Data)) + if len(blocksizes) != 0 { + var subRegions []region + if todo.rangeKnown { + var regionsInBound int + for _, bs := range blocksizes { + if todo.low <= filesize+bs && filesize < todo.high { + regionsInBound++ + } + filesize += bs + } + + subRegions = create[region](regionsInBound) + var j int + cursor := uint64(len(metadata.Data)) + for i, bs := range blocksizes { + if cursor >= todo.high { + break + } + if todo.low <= cursor+bs { + var low uint64 + if todo.low > cursor { + low = todo.low - cursor + } + high := todo.high - cursor + if bs < high { + high = bs + } + + subCid, err := loadCidFromBytes(links[i].Hash) + if err != nil { + return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) + } + + subRegions[j] = region{ + c: subCid, + low: low, + high: high, + rangeKnown: true, + } + j++ + } + cursor += bs + } + } else { + subRegions = create[region](len(blocksizes)) + for i, bs := range blocksizes { + subCid, err := loadCidFromBytes(links[i].Hash) + if err != nil { + return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) + } + + subRegions[i] = region{ + c: subCid, + low: 0, + high: bs, + rangeKnown: true, + } + filesize += bs + } + } + d.state = append(d.state, subRegions) + } + + if todo.rangeKnown { + expectedSize := todo.high - todo.low + if filesize != expectedSize { + return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), expectedSize, filesize) + } + } + if metadata.Filesize != nil { + if *metadata.Filesize != filesize { + return 0, fmt.Errorf("inconsistent Filesize metadata field for %s, expected %d; got %d", cidStringTruncate(c), filesize, *metadata.Filesize) + } + } + default: + return 0, fmt.Errorf("unkown unixfs node type for %s: %s", cidStringTruncate(c), metadata.Type.String()) + } + default: - return 0, fmt.Errorf("unknown codec type %d; expected Raw or Dag-PB", pref.Codec) + return 0, fmt.Errorf("unknown codec type %d for %s; expected Raw or Dag-PB", pref.Codec, cidStringTruncate(c)) } } - toRead := d.curBlock - - if len(b) < toRead { - toRead = len(b) + n := copy(b, d.curBlock) + d.curBlock = d.curBlock[n:] + if len(d.curBlock) == 0 { + d.curBlock = nil // early gc } - n, err := d.buf.Read(b[:toRead]) - d.curBlock -= n - return n, err + return n, nil } diff --git a/go.mod b/go.mod index f746cd437..712ddceb8 100644 --- a/go.mod +++ b/go.mod @@ -24,5 +24,6 @@ require ( golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 // indirect golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect + google.golang.org/protobuf v1.28.1 // indirect lukechampine.com/blake3 v1.1.6 // indirect ) diff --git a/go.sum b/go.sum index 7fe9c763d..0825d4238 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,6 @@ +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48= @@ -76,5 +78,8 @@ golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXR golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= lukechampine.com/blake3 v1.1.6 h1:H3cROdztr7RCfoaTpGZFQsrqvweFLrqS73j7L7cmR5c= lukechampine.com/blake3 v1.1.6/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA= diff --git a/internal/pb/Makefile b/internal/pb/Makefile new file mode 100644 index 000000000..1cb32f3df --- /dev/null +++ b/internal/pb/Makefile @@ -0,0 +1,11 @@ +PB = $(wildcard *.proto) +GO = $(PB:.proto=.pb.go) + +all: $(GO) + +%.pb.go: %.proto + protoc --proto_path=$(GOPATH)/src:. --go_out=. $< + +clean: + rm -f *.pb.go + rm -f *.go diff --git a/internal/pb/file.pb.go b/internal/pb/file.pb.go new file mode 100644 index 000000000..a4863104c --- /dev/null +++ b/internal/pb/file.pb.go @@ -0,0 +1,488 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.27.1 +// protoc v3.10.1 +// source: file.proto + +package __ + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type UnixfsData_DataType int32 + +const ( + UnixfsData_Raw UnixfsData_DataType = 0 + UnixfsData_Directory UnixfsData_DataType = 1 + UnixfsData_File UnixfsData_DataType = 2 + UnixfsData_Metadata UnixfsData_DataType = 3 + UnixfsData_Symlink UnixfsData_DataType = 4 + UnixfsData_HAMTShard UnixfsData_DataType = 5 +) + +// Enum value maps for UnixfsData_DataType. +var ( + UnixfsData_DataType_name = map[int32]string{ + 0: "Raw", + 1: "Directory", + 2: "File", + 3: "Metadata", + 4: "Symlink", + 5: "HAMTShard", + } + UnixfsData_DataType_value = map[string]int32{ + "Raw": 0, + "Directory": 1, + "File": 2, + "Metadata": 3, + "Symlink": 4, + "HAMTShard": 5, + } +) + +func (x UnixfsData_DataType) Enum() *UnixfsData_DataType { + p := new(UnixfsData_DataType) + *p = x + return p +} + +func (x UnixfsData_DataType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (UnixfsData_DataType) Descriptor() protoreflect.EnumDescriptor { + return file_file_proto_enumTypes[0].Descriptor() +} + +func (UnixfsData_DataType) Type() protoreflect.EnumType { + return &file_file_proto_enumTypes[0] +} + +func (x UnixfsData_DataType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Do not use. +func (x *UnixfsData_DataType) UnmarshalJSON(b []byte) error { + num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) + if err != nil { + return err + } + *x = UnixfsData_DataType(num) + return nil +} + +// Deprecated: Use UnixfsData_DataType.Descriptor instead. +func (UnixfsData_DataType) EnumDescriptor() ([]byte, []int) { + return file_file_proto_rawDescGZIP(), []int{0, 0} +} + +type UnixfsData struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Type *UnixfsData_DataType `protobuf:"varint,1,req,name=Type,enum=pb.UnixfsData_DataType" json:"Type,omitempty"` + Data []byte `protobuf:"bytes,2,opt,name=Data" json:"Data,omitempty"` + Filesize *uint64 `protobuf:"varint,3,opt,name=filesize" json:"filesize,omitempty"` + Blocksizes []uint64 `protobuf:"varint,4,rep,name=blocksizes" json:"blocksizes,omitempty"` + HashType *uint64 `protobuf:"varint,5,opt,name=hashType" json:"hashType,omitempty"` + Fanout *uint64 `protobuf:"varint,6,opt,name=fanout" json:"fanout,omitempty"` +} + +func (x *UnixfsData) Reset() { + *x = UnixfsData{} + if protoimpl.UnsafeEnabled { + mi := &file_file_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *UnixfsData) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*UnixfsData) ProtoMessage() {} + +func (x *UnixfsData) ProtoReflect() protoreflect.Message { + mi := &file_file_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use UnixfsData.ProtoReflect.Descriptor instead. +func (*UnixfsData) Descriptor() ([]byte, []int) { + return file_file_proto_rawDescGZIP(), []int{0} +} + +func (x *UnixfsData) GetType() UnixfsData_DataType { + if x != nil && x.Type != nil { + return *x.Type + } + return UnixfsData_Raw +} + +func (x *UnixfsData) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +func (x *UnixfsData) GetFilesize() uint64 { + if x != nil && x.Filesize != nil { + return *x.Filesize + } + return 0 +} + +func (x *UnixfsData) GetBlocksizes() []uint64 { + if x != nil { + return x.Blocksizes + } + return nil +} + +func (x *UnixfsData) GetHashType() uint64 { + if x != nil && x.HashType != nil { + return *x.HashType + } + return 0 +} + +func (x *UnixfsData) GetFanout() uint64 { + if x != nil && x.Fanout != nil { + return *x.Fanout + } + return 0 +} + +type Metadata struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + MimeType *string `protobuf:"bytes,1,opt,name=MimeType" json:"MimeType,omitempty"` +} + +func (x *Metadata) Reset() { + *x = Metadata{} + if protoimpl.UnsafeEnabled { + mi := &file_file_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Metadata) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Metadata) ProtoMessage() {} + +func (x *Metadata) ProtoReflect() protoreflect.Message { + mi := &file_file_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Metadata.ProtoReflect.Descriptor instead. +func (*Metadata) Descriptor() ([]byte, []int) { + return file_file_proto_rawDescGZIP(), []int{1} +} + +func (x *Metadata) GetMimeType() string { + if x != nil && x.MimeType != nil { + return *x.MimeType + } + return "" +} + +type PBLink struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // binary CID (with no multibase prefix) of the target object + Hash []byte `protobuf:"bytes,1,opt,name=Hash" json:"Hash,omitempty"` + // UTF-8 string name + Name *string `protobuf:"bytes,2,opt,name=Name" json:"Name,omitempty"` + // cumulative size of target object + Tsize *uint64 `protobuf:"varint,3,opt,name=Tsize" json:"Tsize,omitempty"` +} + +func (x *PBLink) Reset() { + *x = PBLink{} + if protoimpl.UnsafeEnabled { + mi := &file_file_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PBLink) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PBLink) ProtoMessage() {} + +func (x *PBLink) ProtoReflect() protoreflect.Message { + mi := &file_file_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PBLink.ProtoReflect.Descriptor instead. +func (*PBLink) Descriptor() ([]byte, []int) { + return file_file_proto_rawDescGZIP(), []int{2} +} + +func (x *PBLink) GetHash() []byte { + if x != nil { + return x.Hash + } + return nil +} + +func (x *PBLink) GetName() string { + if x != nil && x.Name != nil { + return *x.Name + } + return "" +} + +func (x *PBLink) GetTsize() uint64 { + if x != nil && x.Tsize != nil { + return *x.Tsize + } + return 0 +} + +type PBNode struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // refs to other objects + Links []*PBLink `protobuf:"bytes,2,rep,name=Links" json:"Links,omitempty"` + // opaque user data + Data []byte `protobuf:"bytes,1,opt,name=Data" json:"Data,omitempty"` +} + +func (x *PBNode) Reset() { + *x = PBNode{} + if protoimpl.UnsafeEnabled { + mi := &file_file_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PBNode) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PBNode) ProtoMessage() {} + +func (x *PBNode) ProtoReflect() protoreflect.Message { + mi := &file_file_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PBNode.ProtoReflect.Descriptor instead. +func (*PBNode) Descriptor() ([]byte, []int) { + return file_file_proto_rawDescGZIP(), []int{3} +} + +func (x *PBNode) GetLinks() []*PBLink { + if x != nil { + return x.Links + } + return nil +} + +func (x *PBNode) GetData() []byte { + if x != nil { + return x.Data + } + return nil +} + +var File_file_proto protoreflect.FileDescriptor + +var file_file_proto_rawDesc = []byte{ + 0x0a, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x02, 0x70, 0x62, + 0x22, 0x95, 0x02, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x78, 0x66, 0x73, 0x44, 0x61, 0x74, 0x61, 0x12, + 0x2b, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x02, 0x28, 0x0e, 0x32, 0x17, 0x2e, + 0x70, 0x62, 0x2e, 0x75, 0x6e, 0x69, 0x78, 0x66, 0x73, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x44, 0x61, + 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, + 0x44, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x44, 0x61, 0x74, 0x61, + 0x12, 0x1a, 0x0a, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x04, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1e, 0x0a, 0x0a, + 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x69, 0x7a, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x04, + 0x52, 0x0a, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x69, 0x7a, 0x65, 0x73, 0x12, 0x1a, 0x0a, 0x08, + 0x68, 0x61, 0x73, 0x68, 0x54, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x08, + 0x68, 0x61, 0x73, 0x68, 0x54, 0x79, 0x70, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x61, 0x6e, 0x6f, + 0x75, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x66, 0x61, 0x6e, 0x6f, 0x75, 0x74, + 0x22, 0x56, 0x0a, 0x08, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, + 0x52, 0x61, 0x77, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, + 0x72, 0x79, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x46, 0x69, 0x6c, 0x65, 0x10, 0x02, 0x12, 0x0c, + 0x0a, 0x08, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, + 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x10, 0x04, 0x12, 0x0d, 0x0a, 0x09, 0x48, 0x41, 0x4d, + 0x54, 0x53, 0x68, 0x61, 0x72, 0x64, 0x10, 0x05, 0x22, 0x26, 0x0a, 0x08, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x6d, 0x65, 0x54, 0x79, 0x70, 0x65, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x4d, 0x69, 0x6d, 0x65, 0x54, 0x79, 0x70, 0x65, + 0x22, 0x46, 0x0a, 0x06, 0x50, 0x42, 0x4c, 0x69, 0x6e, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x48, 0x61, + 0x73, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x48, 0x61, 0x73, 0x68, 0x12, 0x12, + 0x0a, 0x04, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x4e, 0x61, + 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x54, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x05, 0x54, 0x73, 0x69, 0x7a, 0x65, 0x22, 0x3e, 0x0a, 0x06, 0x50, 0x42, 0x4e, 0x6f, + 0x64, 0x65, 0x12, 0x20, 0x0a, 0x05, 0x4c, 0x69, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x0a, 0x2e, 0x70, 0x62, 0x2e, 0x50, 0x42, 0x4c, 0x69, 0x6e, 0x6b, 0x52, 0x05, 0x4c, + 0x69, 0x6e, 0x6b, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x44, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x0c, 0x52, 0x04, 0x44, 0x61, 0x74, 0x61, 0x42, 0x03, 0x5a, 0x01, 0x2e, +} + +var ( + file_file_proto_rawDescOnce sync.Once + file_file_proto_rawDescData = file_file_proto_rawDesc +) + +func file_file_proto_rawDescGZIP() []byte { + file_file_proto_rawDescOnce.Do(func() { + file_file_proto_rawDescData = protoimpl.X.CompressGZIP(file_file_proto_rawDescData) + }) + return file_file_proto_rawDescData +} + +var file_file_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_file_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_file_proto_goTypes = []interface{}{ + (UnixfsData_DataType)(0), // 0: pb.unixfsData.DataType + (*UnixfsData)(nil), // 1: pb.unixfsData + (*Metadata)(nil), // 2: pb.Metadata + (*PBLink)(nil), // 3: pb.PBLink + (*PBNode)(nil), // 4: pb.PBNode +} +var file_file_proto_depIdxs = []int32{ + 0, // 0: pb.unixfsData.Type:type_name -> pb.unixfsData.DataType + 3, // 1: pb.PBNode.Links:type_name -> pb.PBLink + 2, // [2:2] is the sub-list for method output_type + 2, // [2:2] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_file_proto_init() } +func file_file_proto_init() { + if File_file_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_file_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*UnixfsData); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_file_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Metadata); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_file_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PBLink); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_file_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PBNode); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_file_proto_rawDesc, + NumEnums: 1, + NumMessages: 4, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_file_proto_goTypes, + DependencyIndexes: file_file_proto_depIdxs, + EnumInfos: file_file_proto_enumTypes, + MessageInfos: file_file_proto_msgTypes, + }.Build() + File_file_proto = out.File + file_file_proto_rawDesc = nil + file_file_proto_goTypes = nil + file_file_proto_depIdxs = nil +} diff --git a/internal/pb/file.proto b/internal/pb/file.proto new file mode 100644 index 000000000..3792da43a --- /dev/null +++ b/internal/pb/file.proto @@ -0,0 +1,47 @@ +syntax = "proto2"; + +package pb; + +option go_package = "."; + +message unixfsData { + enum DataType { + Raw = 0; + Directory = 1; + File = 2; + Metadata = 3; + Symlink = 4; + HAMTShard = 5; + } + + required DataType Type = 1; + optional bytes Data = 2; + optional uint64 filesize = 3; + repeated uint64 blocksizes = 4; + + optional uint64 hashType = 5; + optional uint64 fanout = 6; +} + +message Metadata { + optional string MimeType = 1; +} + +message PBLink { + // binary CID (with no multibase prefix) of the target object + optional bytes Hash = 1; + + // UTF-8 string name + optional string Name = 2; + + // cumulative size of target object + optional uint64 Tsize = 3; +} + +message PBNode { + // refs to other objects + repeated PBLink Links = 2; + + // opaque user data + optional bytes Data = 1; +} From 8ed010b4e00dd421b5f2593dc1719717e28666a9 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 6 Oct 2022 22:13:07 +0200 Subject: [PATCH 03/39] I should add a CI (go mod tidy) --- go.mod | 14 ++++++++------ go.sum | 17 ++++++----------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/go.mod b/go.mod index 712ddceb8..7ec81532c 100644 --- a/go.mod +++ b/go.mod @@ -2,21 +2,24 @@ module github.com/Jorropo/go-featheripfs go 1.19 +require ( + github.com/ipfs/go-cid v0.3.2 + github.com/ipfs/go-ipld-cbor v0.0.6 + github.com/ipfs/go-verifcid v0.0.2 + github.com/multiformats/go-multihash v0.2.1 + google.golang.org/protobuf v1.28.1 +) + require ( github.com/ipfs/go-block-format v0.0.2 // indirect - github.com/ipfs/go-cid v0.3.2 // indirect github.com/ipfs/go-ipfs-util v0.0.1 // indirect - github.com/ipfs/go-ipld-cbor v0.0.6 // indirect github.com/ipfs/go-ipld-format v0.0.1 // indirect - github.com/ipfs/go-verifcid v0.0.2 // indirect github.com/klauspost/cpuid/v2 v2.0.9 // indirect - github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 // indirect github.com/minio/sha256-simd v1.0.0 // indirect github.com/mr-tron/base58 v1.2.0 // indirect github.com/multiformats/go-base32 v0.0.3 // indirect github.com/multiformats/go-base36 v0.1.0 // indirect github.com/multiformats/go-multibase v0.0.3 // indirect - github.com/multiformats/go-multihash v0.2.1 // indirect github.com/multiformats/go-varint v0.0.6 // indirect github.com/polydawn/refmt v0.0.0-20190221155625-df39d6c2d992 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect @@ -24,6 +27,5 @@ require ( golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 // indirect golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect - google.golang.org/protobuf v1.28.1 // indirect lukechampine.com/blake3 v1.1.6 // indirect ) diff --git a/go.sum b/go.sum index 0825d4238..ea015b69e 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= github.com/gxed/hashland/murmur3 v0.0.1/go.mod h1:KjXop02n4/ckmZSnY2+HKcLud/tcmvhST0bie/0lS48= @@ -18,12 +20,11 @@ github.com/ipfs/go-ipld-format v0.0.1 h1:HCu4eB/Gh+KD/Q0M8u888RFkorTWNIL3da4oc5d github.com/ipfs/go-ipld-format v0.0.1/go.mod h1:kyJtbkDALmFHv3QR6et67i35QzO3S0dCDnkOJhcZkms= github.com/ipfs/go-verifcid v0.0.2 h1:XPnUv0XmdH+ZIhLGKg6U2vaPaRDXb9urMyNVCE7uvTs= github.com/ipfs/go-verifcid v0.0.2/go.mod h1:40cD9x1y4OWnFXbLNJYRe7MpNvWlMn3LZAG5Wb4xnPU= +github.com/jtolds/gls v4.2.1+incompatible h1:fSuqC+Gmlu6l/ZYAoZzx2pyucC8Xza35fpRVWLVmUEE= github.com/jtolds/gls v4.2.1+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/klauspost/cpuid/v2 v2.0.4 h1:g0I61F2K2DjRHz1cnxlkNSBIaePVoJIjjnHui8QHbiw= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1 h1:lYpkrQH5ajf0OXOcUbGjvZxxijuBwbbmlSxLiuofa+g= github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8RvIylQ358TN4wwqatJ8rNavkEINozVn9DtGI3dfQ= github.com/minio/sha256-simd v0.0.0-20190131020904-2d45a736cd16/go.mod h1:2FMWW+8GMoPweT6+pI63m9YE3Lmw4J71hV56Chs1E/U= github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= @@ -42,39 +43,33 @@ github.com/multiformats/go-multibase v0.0.3 h1:l/B6bJDQjvQ5G52jw4QGSYeOTZoAwIO77 github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= -github.com/multiformats/go-multihash v0.0.15 h1:hWOPdrNqDjwHDx82vsYGSDZNyktOJJ2dzZJzFkOV1jM= -github.com/multiformats/go-multihash v0.0.15/go.mod h1:D6aZrWNLFTV/ynMpKsNtB40mJzmCl4jb1alC0OvHiHg= github.com/multiformats/go-multihash v0.2.1 h1:aem8ZT0VA2nCHHk7bPJ1BjUbHNciqZC/d16Vve9l108= github.com/multiformats/go-multihash v0.2.1/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY= github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/polydawn/refmt v0.0.0-20190221155625-df39d6c2d992 h1:bzMe+2coZJYHnhGgVlcQKuRy4FSny4ds8dLQjw5P1XE= github.com/polydawn/refmt v0.0.0-20190221155625-df39d6c2d992/go.mod h1:uIp+gprXxxrWSjjklXD+mN4wed/tMfjMMmN/9+JsA9o= +github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM= github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= +github.com/smartystreets/goconvey v0.0.0-20190222223459-a17d461953aa h1:E+gaaifzi2xF65PbDmuKI3PhLWY6G5opMLniFq8vmXA= github.com/smartystreets/goconvey v0.0.0-20190222223459-a17d461953aa/go.mod h1:2RVY1rIf+2J2o/IM9+vPq9RzmHDSseB7FoXiSNIUsoU= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436 h1:qOpVTI+BrstcjTZLm2Yz/3sOnqkzj3FQoh0g+E5s3Gc= github.com/warpfork/go-wish v0.0.0-20180510122957-5ad1f5abf436/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 h1:WXhVOwj2USAXB5oMDwRl3piOux2XMV9TANaYxXHdkoE= github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158/go.mod h1:Xj/M2wWU+QdTdRbu/L/1dIZY8/Wb2K9pAhtroQuxJJI= golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= -golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf h1:B2n+Zi5QeYRDAEodEu72OS36gmTWjgpXr2+cWcBW90o= -golang.org/x/crypto v0.0.0-20210506145944-38f3c27a63bf/go.mod h1:P+XmwS30IXTQdn5tA2iutPOUgjI07+tq3H3K9MVA1s8= golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM= golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210309074719-68d13333faf2 h1:46ULzRKLh1CwgRq2dC5SlBzEqqNCi8rreOZnNrbqcIY= -golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 h1:SrN+KX8Art/Sf4HNj6Zcz06G7VEz+7w9tdXTPOZ7+l4= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 40d219654e07b3695040b91456c54e1fe9cb47f1 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 6 Oct 2022 22:34:43 +0200 Subject: [PATCH 04/39] fix: correctly decode inline CIDs --- entry.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/entry.go b/entry.go index 01f7d96dd..516cd5119 100644 --- a/entry.go +++ b/entry.go @@ -165,7 +165,8 @@ func (d *downloader) Read(b []byte) (int, error) { pref := c.Prefix() switch pref.MhType { case mh.IDENTITY: - data = c.Hash()[1:] // skip the 0x00 prefix + data = c.Hash()[1:] // skip the 0x00 prefix + data = data[len(data)-pref.MhLength:] // skip the multihash length default: if err := verifcid.ValidateCid(c); err != nil { return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) @@ -223,7 +224,7 @@ func (d *downloader) Read(b []byte) (int, error) { if todo.rangeKnown { expectedSize := todo.high - todo.low if uint64(len(data)) != expectedSize { - return 0, fmt.Errorf("leaf isn't size is incorrect for %s, expected %d; got %d", cidStringTruncate(c), len(data), expectedSize) + return 0, fmt.Errorf("leaf isn't size is incorrect for %s, expected %d; got %d", cidStringTruncate(c), expectedSize, len(data)) } } d.curBlock = data From 9b572d0eaffd7dca902d340d8d7076959f528d8d Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 6 Oct 2022 22:46:12 +0200 Subject: [PATCH 05/39] chore: cleanup low and high in the default region --- entry.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/entry.go b/entry.go index 516cd5119..389f628f7 100644 --- a/entry.go +++ b/entry.go @@ -83,7 +83,7 @@ func DownloadFile(c cid.Cid) (io.ReadCloser, error) { r := &downloader{ Closer: resp.Body, - state: [][]region{{{low: 0, high: 1<<64 - 1, c: c}}}, + state: [][]region{{{c: c}}}, } r.buf = *bufio.NewReaderSize(resp.Body, maxBlockSize*2+4096*2) From 8bb4200d87688b93036f30bfd537bcf09e12b8d7 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 5 Jan 2023 12:19:21 +0100 Subject: [PATCH 06/39] refactor: use a simpler stack for the state --- entry.go | 46 +++++++++++++++++++--------------------------- go.mod | 3 ++- go.sum | 8 +++++--- 3 files changed, 26 insertions(+), 31 deletions(-) diff --git a/entry.go b/entry.go index 389f628f7..b2aad1234 100644 --- a/entry.go +++ b/entry.go @@ -9,6 +9,7 @@ import ( "net/http" pb "github.com/Jorropo/go-featheripfs/internal/pb" + "golang.org/x/exp/slices" "google.golang.org/protobuf/proto" "github.com/ipfs/go-cid" @@ -17,10 +18,6 @@ import ( mh "github.com/multiformats/go-multihash" ) -func create[T any](len int) []T { - return append([]T{}, make([]T, len)...) -} - func cidStringTruncate(c cid.Cid) string { cidStr := c.String() if len(cidStr) > maxCidCharDisplay { @@ -56,7 +53,7 @@ type downloader struct { io.Closer buf bufio.Reader - state [][]region + state []region curBlock []byte } @@ -83,7 +80,7 @@ func DownloadFile(c cid.Cid) (io.ReadCloser, error) { r := &downloader{ Closer: resp.Body, - state: [][]region{{{c: c}}}, + state: []region{{c: c}}, } r.buf = *bufio.NewReaderSize(resp.Body, maxBlockSize*2+4096*2) @@ -149,15 +146,8 @@ func (d *downloader) Read(b []byte) (int, error) { // pop current item from the DFS stack last := len(d.state) - 1 - todos := d.state[last] - todo := todos[0] - todos = todos[1:] - if len(todos) == 0 { - d.state[last] = nil // early gc - d.state = d.state[:last] - } else { - d.state[last] = todos - } + todo := d.state[last] + d.state = d.state[:last] var data []byte c := todo.c @@ -270,7 +260,6 @@ func (d *downloader) Read(b []byte) (int, error) { filesize := uint64(len(metadata.Data)) if len(blocksizes) != 0 { - var subRegions []region if todo.rangeKnown { var regionsInBound int for _, bs := range blocksizes { @@ -280,10 +269,11 @@ func (d *downloader) Read(b []byte) (int, error) { filesize += bs } - subRegions = create[region](regionsInBound) - var j int + regions := slices.Grow(d.state, regionsInBound) cursor := uint64(len(metadata.Data)) - for i, bs := range blocksizes { + for i := len(blocksizes); i > 0; { + i-- + bs := blocksizes[i] if cursor >= todo.high { break } @@ -302,34 +292,36 @@ func (d *downloader) Read(b []byte) (int, error) { return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) } - subRegions[j] = region{ + regions = append(regions, region{ c: subCid, low: low, high: high, rangeKnown: true, - } - j++ + }) } cursor += bs } + d.state = regions } else { - subRegions = create[region](len(blocksizes)) - for i, bs := range blocksizes { + regions := slices.Grow(d.state, len(blocksizes)) + for i := len(blocksizes); i > 0; { + i-- + bs := blocksizes[i] subCid, err := loadCidFromBytes(links[i].Hash) if err != nil { return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) } - subRegions[i] = region{ + regions = append(regions, region{ c: subCid, low: 0, high: bs, rangeKnown: true, - } + }) filesize += bs } + d.state = regions } - d.state = append(d.state, subRegions) } if todo.rangeKnown { diff --git a/go.mod b/go.mod index 7ec81532c..3e9dbdaa7 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/ipfs/go-ipld-cbor v0.0.6 github.com/ipfs/go-verifcid v0.0.2 github.com/multiformats/go-multihash v0.2.1 + golang.org/x/exp v0.0.0-20230105000112-eab7a2c85304 google.golang.org/protobuf v1.28.1 ) @@ -25,7 +26,7 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/whyrusleeping/cbor-gen v0.0.0-20200123233031-1cdf64d27158 // indirect golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect - golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 // indirect + golang.org/x/sys v0.1.0 // indirect golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect lukechampine.com/blake3 v1.1.6 // indirect ) diff --git a/go.sum b/go.sum index ea015b69e..70110de20 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gxed/hashland/keccakpg v0.0.1/go.mod h1:kRzw3HkwxFU1mpmPP8v1WyQzwdGfmKFJ6tItnhQ67kU= @@ -64,12 +64,14 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM= golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/exp v0.0.0-20230105000112-eab7a2c85304 h1:YUqj+XKtfrn3kXjFIiZ8jwKROD7ioAOOHUuo3ZZ2opc= +golang.org/x/exp v0.0.0-20230105000112-eab7a2c85304/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 h1:SrN+KX8Art/Sf4HNj6Zcz06G7VEz+7w9tdXTPOZ7+l4= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= From 2242d0b30ada6d9802bfa8ef7c127a2e4e33a7d9 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 5 Jan 2023 12:24:23 +0100 Subject: [PATCH 07/39] fix: correctly handle rangeKnown I first meant it to be used for unixfs correctness checking, however later I've implemented it for range requests. This make it fail at both, so let's keep correctness, I'll implement range requests later. --- entry.go | 98 ++++++++++++-------------------------------------------- 1 file changed, 20 insertions(+), 78 deletions(-) diff --git a/entry.go b/entry.go index b2aad1234..cb209d505 100644 --- a/entry.go +++ b/entry.go @@ -44,8 +44,7 @@ const maxCidCharDisplay = 512 type region struct { c cid.Cid - low uint64 - high uint64 + size uint64 rangeKnown bool } @@ -212,9 +211,8 @@ func (d *downloader) Read(b []byte) (int, error) { switch pref.Codec { case cid.Raw: if todo.rangeKnown { - expectedSize := todo.high - todo.low - if uint64(len(data)) != expectedSize { - return 0, fmt.Errorf("leaf isn't size is incorrect for %s, expected %d; got %d", cidStringTruncate(c), expectedSize, len(data)) + if uint64(len(data)) != todo.size { + return 0, fmt.Errorf("leaf isn't size is incorrect for %s, expected %d; got %d", cidStringTruncate(c), todo.size, len(data)) } } d.curBlock = data @@ -246,88 +244,32 @@ func (d *downloader) Read(b []byte) (int, error) { return 0, fmt.Errorf("inconsistent sisterlists for %s, %d vs %d", cidStringTruncate(c), len(blocksizes), len(links)) } - if todo.rangeKnown { - if todo.low < uint64(len(metadata.Data)) { - high := uint64(len(metadata.Data)) - if high > todo.high { - high = todo.high - } - d.curBlock = metadata.Data[todo.low:high] - } - } else { - d.curBlock = metadata.Data - } + d.curBlock = metadata.Data filesize := uint64(len(metadata.Data)) if len(blocksizes) != 0 { - if todo.rangeKnown { - var regionsInBound int - for _, bs := range blocksizes { - if todo.low <= filesize+bs && filesize < todo.high { - regionsInBound++ - } - filesize += bs + regions := slices.Grow(d.state, len(blocksizes)) + for i := len(blocksizes); i > 0; { + i-- + bs := blocksizes[i] + subCid, err := loadCidFromBytes(links[i].Hash) + if err != nil { + return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) } - regions := slices.Grow(d.state, regionsInBound) - cursor := uint64(len(metadata.Data)) - for i := len(blocksizes); i > 0; { - i-- - bs := blocksizes[i] - if cursor >= todo.high { - break - } - if todo.low <= cursor+bs { - var low uint64 - if todo.low > cursor { - low = todo.low - cursor - } - high := todo.high - cursor - if bs < high { - high = bs - } - - subCid, err := loadCidFromBytes(links[i].Hash) - if err != nil { - return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) - } - - regions = append(regions, region{ - c: subCid, - low: low, - high: high, - rangeKnown: true, - }) - } - cursor += bs - } - d.state = regions - } else { - regions := slices.Grow(d.state, len(blocksizes)) - for i := len(blocksizes); i > 0; { - i-- - bs := blocksizes[i] - subCid, err := loadCidFromBytes(links[i].Hash) - if err != nil { - return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) - } - - regions = append(regions, region{ - c: subCid, - low: 0, - high: bs, - rangeKnown: true, - }) - filesize += bs - } - d.state = regions + regions = append(regions, region{ + c: subCid, + size: bs, + rangeKnown: true, + }) + filesize += bs } + d.state = regions } if todo.rangeKnown { - expectedSize := todo.high - todo.low - if filesize != expectedSize { - return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), expectedSize, filesize) + if todo.size != filesize { + return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), todo.size, filesize) } } if metadata.Filesize != nil { From 86975d6eaa7cd9e194d270b62532b2a4ab4ff440 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 5 Jan 2023 12:29:40 +0100 Subject: [PATCH 08/39] cleanup: make the maxElementSize clearer --- entry.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/entry.go b/entry.go index cb209d505..740da97b3 100644 --- a/entry.go +++ b/entry.go @@ -40,6 +40,7 @@ const gateway = "http://localhost:8080/ipfs/" const maxHeaderSize = 32 * 1024 * 1024 // 32MiB const maxBlockSize = 2 * 1024 * 1024 // 2MiB const maxCidSize = 4096 +const maxElementSize = maxCidSize + maxBlockSize + binary.MaxVarintLen64 const maxCidCharDisplay = 512 type region struct { @@ -81,7 +82,7 @@ func DownloadFile(c cid.Cid) (io.ReadCloser, error) { Closer: resp.Body, state: []region{{c: c}}, } - r.buf = *bufio.NewReaderSize(resp.Body, maxBlockSize*2+4096*2) + r.buf = *bufio.NewReaderSize(resp.Body, maxElementSize*2) headerSize, err := binary.ReadUvarint(&r.buf) if err != nil { From a20ddaa8941077f51d7884dbb28be2891af515b5 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 5 Jan 2023 12:39:36 +0100 Subject: [PATCH 09/39] feat: automatically close the connection if Read fails --- entry.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/entry.go b/entry.go index 740da97b3..52b6ef74b 100644 --- a/entry.go +++ b/entry.go @@ -57,8 +57,6 @@ type downloader struct { curBlock []byte } -// If DownloadFile returns a non nil error, you MUST call Close on the reader, -// even if reader.Read returns an error. func DownloadFile(c cid.Cid) (io.ReadCloser, error) { req, err := http.NewRequest("GET", gateway+c.String(), bytes.NewReader(nil)) if err != nil { @@ -144,6 +142,13 @@ func (d *downloader) Read(b []byte) (int, error) { return 0, io.EOF } + var good bool + defer func() { + if !good { + d.Close() + } + }() + // pop current item from the DFS stack last := len(d.state) - 1 todo := d.state[last] @@ -285,6 +290,8 @@ func (d *downloader) Read(b []byte) (int, error) { default: return 0, fmt.Errorf("unknown codec type %d for %s; expected Raw or Dag-PB", pref.Codec, cidStringTruncate(c)) } + + good = true } n := copy(b, d.curBlock) From 4707f58b7e1c8be7ee33c5f1f44fae5326bb4193 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 5 Jan 2023 12:39:46 +0100 Subject: [PATCH 10/39] cleanup: clean identity expression --- entry.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/entry.go b/entry.go index 52b6ef74b..78af89987 100644 --- a/entry.go +++ b/entry.go @@ -160,8 +160,7 @@ func (d *downloader) Read(b []byte) (int, error) { pref := c.Prefix() switch pref.MhType { case mh.IDENTITY: - data = c.Hash()[1:] // skip the 0x00 prefix - data = data[len(data)-pref.MhLength:] // skip the multihash length + data = data[1+len(data)-pref.MhLength:] // skip the 0x00 prefix and the multihash length default: if err := verifcid.ValidateCid(c); err != nil { return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) From 0169b3249e517bd14e78369efb5e40cf168bdd55 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 25 Mar 2023 10:01:27 +0100 Subject: [PATCH 11/39] fix: identity expression This fixes bug introduced in 4707f58b7e1c8be7ee33c5f1f44fae5326bb4193. This is was completely wrong (does not load the digest and incorrectly truncate the data incorrectly.) --- entry.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/entry.go b/entry.go index 78af89987..fe9758ae0 100644 --- a/entry.go +++ b/entry.go @@ -160,7 +160,8 @@ func (d *downloader) Read(b []byte) (int, error) { pref := c.Prefix() switch pref.MhType { case mh.IDENTITY: - data = data[1+len(data)-pref.MhLength:] // skip the 0x00 prefix and the multihash length + data = c.Hash() + data = data[len(data)-pref.MhLength:] // extract digest default: if err := verifcid.ValidateCid(c); err != nil { return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) From f5f8e3393a718cec758aaa8c5c17e2a036318411 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sun, 11 Jun 2023 10:10:48 +0200 Subject: [PATCH 12/39] unixfs: bootstrap new implementation --- unixfs/json.go | 103 ++++++++++++++++++++++++++++++ unixfs/unixfs.go | 144 ++++++++++++++++++++++++++++++++++++++++++ unixfs/unixfs_test.go | 72 +++++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 unixfs/json.go create mode 100644 unixfs/unixfs.go create mode 100644 unixfs/unixfs_test.go diff --git a/unixfs/json.go b/unixfs/json.go new file mode 100644 index 000000000..28701123d --- /dev/null +++ b/unixfs/json.go @@ -0,0 +1,103 @@ +package unixfs + +import ( + "encoding" + "errors" + "fmt" + "strconv" +) + +var _ fmt.Stringer = AliasableString(nil) +var _ encoding.TextMarshaler = AliasableString(nil) +var _ encoding.TextUnmarshaler = (*AliasableString)(nil) + +// AliasableString is a byte slice that have string json sementics, allowing to skip allocations while decoding. +type AliasableString []byte + +func (s AliasableString) String() string { + return string(s) +} + +func (s AliasableString) MarshalText() ([]byte, error) { + return s, nil +} + +func (s *AliasableString) UnmarshalText(b []byte) error { + // Sadly we must copy. + // UnmarshalText must copy the text if it wishes to retain the text after returning. + new := make([]byte, len(b)) + copy(new, b) + *s = b + return nil +} + +var _ fmt.Stringer = Type(0) +var _ encoding.TextMarshaler = Type(0) +var _ encoding.TextUnmarshaler = (*Type)(nil) + +// Type is an alternative to [Node] which allows for zero-allocation code. +type Type uint8 + +func (t Type) String() string { + switch t { + case TError: + return "Error" + case TFile: + return "File" + case TDirectory: + return "Directory" + case TSymlink: + return "Symlink" + default: + return "error unknown type: " + strconv.FormatUint(uint64(t), 10) + } +} + +var ( + textError = []byte("Error") + textFile = []byte("File") + textDirectory = []byte("Directory") + textSymlink = []byte("Symlink") +) + +func (t Type) MarshalText() ([]byte, error) { + switch t { + case TError: + return textError, nil + case TFile: + return textFile, nil + case TDirectory: + return textDirectory, nil + case TSymlink: + return textSymlink, nil + default: + return nil, errors.New(t.String()) + } +} + +func (t *Type) UnmarshalText(b []byte) error { + switch string(b) { + case "Error": + *t = TError + return nil + case "File": + *t = TFile + return nil + case "Directory": + *t = TDirectory + return nil + case "Symlink": + *t = TSymlink + return nil + default: + return fmt.Errorf("unknown unixfs type: %q", string(b)) + } +} + +const ( + // TError is returned when something wrong happend. + TError Type = iota + TFile + TDirectory + TSymlink +) diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go new file mode 100644 index 000000000..48e5c256a --- /dev/null +++ b/unixfs/unixfs.go @@ -0,0 +1,144 @@ +// unixfs provides type safe low level premitives to read and write unixfs blocks. +// It handles encoding, decoding and validation but does not handle any +// cross-block linking, this is provided by various opiniated implementations +// available in sub packages or as an exercise to the consumer. +// +// This package is Data-Oriented, the main way this impact tradeoffs is that +// state is moved to control flow when possible and allocations are hammered to +// a minimum for example by returning pointers aliased to the input. +package unixfs + +import ( + "errors" + + "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + "github.com/multiformats/go-multicodec" +) + +// Entry is a basic unit block. +type Entry struct { + Cid cid.Cid + // tSize encode the comulative size of the DAG. + // the zero value indicates tsize is missing. + tSize uint64 +} + +func (e Entry) TSize() (tsize uint64, ok bool) { + if e.tSize == 0 { + return 0, false + } + + return e.tSize - 1, true +} + +func (e Entry) Untyped() Entry { + return e +} + +var _ Node = File{} + +type File struct { + badge + Entry + Data []byte + Childrens []FileEntry +} + +func FileEntryWithTSize(c cid.Cid, fileSize, tSize uint64) FileEntry { + return FileEntry{Entry: Entry{Cid: c, tSize: tSize + 1}, FileSize: fileSize} +} + +type FileEntry struct { + Entry + // FileSize is the logical size of the file at this location once decoded. + FileSize uint64 +} + +var _ Node = Directory{} + +type Directory struct { + badge + Entry + Childrens []DirectoryEntry +} + +type DirectoryEntry struct { + Entry + Name AliasableString +} + +var _ Node = Symlink{} + +type Symlink struct { + badge + Entry + Value []byte +} + +// badge authorize a type to be a [Node]. +// If you add a new type using this you need to update [Parse]. +type badge struct{} + +func (badge) nodeBadge() { + panic("badge was called even tho it only exists as a way to trick the type checker") +} + +// Node is an interface that can exclusively be a [File], [Directory] or [Symlink]. We might add more in the future. +// You MUST NOT embed this interface, it's only purpose is to provide type safe enums. +type Node interface { + // Untyped returns the untyped [Entry] for that value stripped of all type related information. + Untyped() Entry + // nodeBadge must never be called it's just here to trick the type checker. + nodeBadge() +} + +// Parse it provides a type safe solution to Decode using the badged interface [Node]. +// [File.Data], [DirectoryEntry.Name] and [Symlink.Value] values are aliased to b.RawData(). +// The data argument MUST hash to cid, this wont check the validaty of the hash. +func Parse(b blocks.Block) (Node, error) { + switch t, f, d, s, err := ParseAppend(nil, nil, b.Cid(), b.RawData()); t { + case TError: + return nil, err + case TFile: + return f, nil + case TDirectory: + return d, nil + case TSymlink: + return s, nil + default: + return nil, errors.New("unknown node type in Parse (Should never happen please open an issue !): " + t.String()) + } +} + +// ParseAppend is like [Parse] except it is turbo charged to avoid allocation. +// It returns a [Type] which indicates which of the struct is correct, all of this is passed on the stack or registers. +// Assuming the capacity in the slices are big enough and err == nil it does not allocate anything, arguments do not escape. +// [File.Data], [DirectoryEntry.Name] and [Symlink.Value] values are aliased to b.RawData(). +// It also accepts the input slices which will be append to and returned in structs to avoid allocations. +// It is only ever gonna clobber the slice related to the type of data decoded. +// It only ever clobber extra capacity within the slices, it may do so in the case of an error. +// The data argument MUST hash to cid, this wont check the validaty of the hash. +func ParseAppend(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, cid cid.Cid, data []byte) (t Type, f File, d Directory, s Symlink, err error) { + // Avoid clobbering the used part of the slice. + fileChildrens = fileChildrens[len(fileChildrens):] + directoryChildrens = directoryChildrens[len(directoryChildrens):] + + pref := cid.Prefix() + switch c := multicodec.Code(pref.Codec); c { + case multicodec.Raw: + t = TFile + f = File{ + Entry: Entry{ + Cid: cid, + tSize: uint64(len(data)) + 1, + }, + Data: data, + Childrens: fileChildrens, + } + return + default: + err = errors.New("unsupported codec: " + c.String()) + return + } +} diff --git a/unixfs/unixfs_test.go b/unixfs/unixfs_test.go new file mode 100644 index 000000000..f9ac061f4 --- /dev/null +++ b/unixfs/unixfs_test.go @@ -0,0 +1,72 @@ +package unixfs_test + +import ( + "bytes" + "testing" + + . "github.com/ipfs/boxo/unixfs" + blocks "github.com/ipfs/go-block-format" + "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" +) + +func TestRaw(t *testing.T) { + t.Parallel() + data := []byte("👋🌍️") + mh, err := mh.Sum(data, mh.BLAKE3, -1) + if err != nil { + t.Fatal() + } + c := cid.NewCidV1(cid.Raw, mh) + + validate := func(t *testing.T, f File) { + if !bytes.Equal(data, f.Data) { + t.Errorf("expected %v got %v", data, f.Data) + } + if l := len(f.Childrens); l != 0 { + t.Errorf("expected 0 Childrens got %d", l) + } + tsize, ok := f.TSize() + if !ok { + t.Error("expected to find TSize but didn't") + } else if l := uint64(len(data)); tsize != l { + t.Errorf("expected tsize %d got %d", l, tsize) + } + if f.Cid != c { + t.Errorf("expected cid %s got %s", c, f.Cid) + } + } + + t.Run("Parse", func(t *testing.T) { + t.Parallel() + b, err := blocks.NewBlockWithCid(data, c) + if err != nil { + t.Fatal(err) + } + a, err := Parse(b) + if err != nil { + t.Fatal(err) + } + f, ok := a.(File) + if !ok { + t.Fatalf("expected File got %T", a) + } + validate(t, f) + }) + t.Run("ParseAppend", func(t *testing.T) { + t.Parallel() + var someArr [2]FileEntry + typ, f, _, _, err := ParseAppend(someArr[:1], nil, c, data) + if err != nil { + t.Fatal(err) + } + if typ != TFile { + t.Fatalf("expected %v got %v", TFile, typ) + } + validate(t, f) + // Check someArr[1] to ensure it doesn't touch already existing entries before len. + if &someArr[1] != &f.Childrens[:1][0] { + t.Fatal("expected pointers to still be aliased but they are not") + } + }) +} From 0fa2b0b6fd0e0c7d1708e7cf007ec3ba96c823eb Mon Sep 17 00:00:00 2001 From: Jorropo Date: Mon, 12 Jun 2023 06:39:21 +0200 Subject: [PATCH 13/39] unixfs: add pb file support --- examples/go.mod | 2 +- examples/go.sum | 4 +- go.mod | 2 +- go.sum | 4 +- unixfs/pb.go | 438 ++++++++++++++++++++++++++++++++++++++++++ unixfs/pb_test.go | 45 +++++ unixfs/unixfs.go | 10 +- unixfs/unixfs_test.go | 77 ++++++++ 8 files changed, 573 insertions(+), 9 deletions(-) create mode 100644 unixfs/pb.go create mode 100644 unixfs/pb_test.go diff --git a/examples/go.mod b/examples/go.mod index a7aa94964..0d9ddad09 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -162,7 +162,7 @@ require ( golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect google.golang.org/grpc v1.53.0 // indirect - google.golang.org/protobuf v1.28.1 // indirect + google.golang.org/protobuf v1.30.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.1.7 // indirect nhooyr.io/websocket v1.8.7 // indirect diff --git a/examples/go.sum b/examples/go.sum index aa888c694..8e6652fff 100644 --- a/examples/go.sum +++ b/examples/go.sum @@ -1062,8 +1062,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= -google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/go.mod b/go.mod index 49fce9b75..5a9c3ce70 100644 --- a/go.mod +++ b/go.mod @@ -79,6 +79,7 @@ require ( golang.org/x/oauth2 v0.4.0 golang.org/x/sync v0.1.0 golang.org/x/sys v0.6.0 + google.golang.org/protobuf v1.30.0 ) require ( @@ -170,7 +171,6 @@ require ( google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f // indirect google.golang.org/grpc v1.53.0 // indirect - google.golang.org/protobuf v1.28.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.1.7 // indirect nhooyr.io/websocket v1.8.7 // indirect diff --git a/go.sum b/go.sum index c05d4d76f..19a2a5793 100644 --- a/go.sum +++ b/go.sum @@ -1083,8 +1083,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= -google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= +google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/unixfs/pb.go b/unixfs/pb.go new file mode 100644 index 000000000..3d2f39818 --- /dev/null +++ b/unixfs/pb.go @@ -0,0 +1,438 @@ +// @Jorropo: The reason why I picked the solution to write a snowflake protobuf +// decoder here is because I couldn't find a zero allocation protobuf decoder generator. +// I do not count pooling or arenas as zero allocation btw. +// If you are reading this text trying to add more fields and this is too painfull +// to deal with feel free to remove this code and replace it with an allocation +// codegen decoder. Ping me too if I'm still around I might revert your changes +// and bring back the allocation free decoder but with the new feature. +package unixfs + +import ( + "errors" + "fmt" + + "github.com/ipfs/go-cid" + "golang.org/x/exp/slices" + "google.golang.org/protobuf/encoding/protowire" +) + +const ( + _ = iota + pbDirectory + pbFile + pbMetadata + pbSymlink + pbHAMTShard +) + +// Reference: +// +// message Data { +// enum DataType { +// Raw = 0; +// Directory = 1; +// File = 2; +// Metadata = 3; +// Symlink = 4; +// HAMTShard = 5; +// } +// +// required DataType Type = 1; +// optional bytes Data = 2; +// optional uint64 filesize = 3; +// repeated uint64 blocksizes = 4; +// +// optional uint64 hashType = 5; +// optional uint64 fanout = 6; +// } +// +// message Metadata { +// optional string MimeType = 1; +// } +// +// message PBLink { +// // binary CID (with no multibase prefix) of the target object +// optional bytes Hash = 1; +// +// // UTF-8 string name +// optional string Name = 2; +// +// // cumulative size of target object +// optional uint64 Tsize = 3; +// } +// +// message PBNode { +// // refs to other objects +// repeated PBLink Links = 2; +// +// // Unixfs message inside the user opaque data +// optional Data Data = 1; +// } + +func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inCid cid.Cid, origData []byte) (typ Type, file File, dir Directory, sym Symlink, err error) { + var dataType uint64 + var fileLinks, blocksizes uint + var content []byte + selfTSize := uint64(1) + data := origData + + moveZeroNamedDirectoryEntriesToDirectoryChildrens := func(extra int) { + // some zero named children were confused for file entries before, move them here + // FIXME: is an empty name a valid file name in a directory ? + directoryChildrens = slices.Grow(directoryChildrens, len(fileChildrens)+extra) + for _, v := range fileChildrens { + directoryChildrens = append(directoryChildrens, DirectoryEntry{ + Entry: Entry{Cid: v.Cid, tSize: v.tSize}, + Name: AliasableString{}, + }) + } + + fileChildrens = nil + } + + for len(data) != 0 { // iterate at the root level of the message + outerNumber, t, l := protowire.ConsumeTag(data) + if l < 0 { + err = protowire.ParseError(l) + return + } + data = data[l:] + switch outerNumber { + case 1, 2: + // optional Data Data = 1; + // repeated PBLink Links = 2; + var group bool + var mData []byte + switch t { + case protowire.StartGroupType: + // boundry delimited message + group = true + mData = data + case protowire.BytesType: + // length prefixed message + mData, l = protowire.ConsumeBytes(data) + if l < 0 { + err = protowire.ParseError(l) + return + } + data = data[l:] // we just extracted the message so walk over it completely + default: + if outerNumber == 1 { + err = fmt.Errorf("unknown type for Data field %v", t) + } else { + err = fmt.Errorf("unknown type for Links field %v", t) + } + return + } + + // FIXME: add support aliased CIDs in github.com/ipfs/go-cid + var c cid.Cid + var name []byte + var tSize uint64 // will be offset by +1, zero means not found + + for len(mData) != 0 { + n, t, l := protowire.ConsumeTag(mData) + if l < 0 { + err = protowire.ParseError(l) + return + } + mData = mData[l:] + + if t == protowire.EndGroupType { + // if we find an EGROUP here it must be ours since pbHandleUnknownField skip over groups. + break + } + + if outerNumber == 1 { + // optional Data Data = 1; + switch n { + case 1: + // required DataType Type = 1; + mData, dataType, err = pbDecodeNumber(t, mData) + if err != nil { + return + } + // due to how "Last One Wins" we can't do anything meaningfull without fully decoding the message first. + + case 2: + // optional bytes Data = 2; + switch t { + case protowire.BytesType: + content, l = protowire.ConsumeBytes(mData) + if l < 0 { + err = protowire.ParseError(l) + return + } + mData = mData[l:] + + default: + err = fmt.Errorf("unknown type for Data.Data field %v", t) + return + } + + case 4: + // repeated uint64 blocksizes = 4; + addBlocksize := func(blocksize uint64) error { + if len(directoryChildrens) != 0 { + return errors.New("invalid unixfs node, mixed use of blocksizes and named links") + } + + if uint(len(fileChildrens)) > blocksizes { + // we have discovered more links than blocksizes at this point, play catchup + fileChildrens[blocksizes].FileSize = blocksize + } else { + // we have discovered more blocksizes than links at this point, add new entries + fileChildrens = append(fileChildrens, FileEntry{FileSize: blocksize}) + } + blocksizes++ + return nil + } + + switch t { + // FIXME: this condition accepts Fixed numbers, is that valid ? + // I mean it works but do other protobuf parsers do this ? + case protowire.VarintType, protowire.Fixed64Type, protowire.Fixed32Type: + var blocksize uint64 + mData, blocksize, err = pbDecodeNumber(t, mData) + if err != nil { + return + } + addBlocksize(blocksize) + + case protowire.BytesType: + // packed representation + packed, l := protowire.ConsumeBytes(mData) + if l < 0 { + err = protowire.ParseError(l) + return + } + mData = mData[l:] + + for len(packed) != 0 { + blocksize, l := protowire.ConsumeVarint(packed) + if l < 0 { + err = protowire.ParseError(l) + return + } + packed = packed[l:] + + addBlocksize(blocksize) + } + + default: + err = fmt.Errorf("unknown type for Data.Blocksizes field %v", t) + return + } + + default: + mData, err = pbHandleUnknownField(t, mData) + if err != nil { + return + } + } + } else { + // repeated PBLink Links = 2; + switch n { + case 1: + // optional bytes Hash = 1; + switch t { + case protowire.BytesType: + cBytes, l := protowire.ConsumeBytes(mData) + if l < 0 { + err = protowire.ParseError(l) + return + } + mData = mData[l:] + + c, err = cid.Cast(cBytes) + if err != nil { + err = fmt.Errorf("failed to decode cid: %w", err) + return + } + default: + err = fmt.Errorf("unknown type for Links.Hash field %v", t) + return + } + + case 2: + // optional string Name = 2; + switch t { + case protowire.BytesType: + name, l = protowire.ConsumeBytes(mData) + if l < 0 { + err = protowire.ParseError(l) + return + } + mData = mData[l:] + + default: + err = fmt.Errorf("unknown type for Links.Name field %v", t) + return + } + + case 3: + // optional uint64 Tsize = 3; + mData, tSize, err = pbDecodeNumber(t, mData) + if selfTSize != 0 { + if tSize == 0 { + selfTSize = 0 + } else { + selfTSize += tSize + } + } + tSize++ + + default: + mData, err = pbHandleUnknownField(t, mData) + if err != nil { + return + } + } + } + } + + if outerNumber == 2 { + // repeated PBLink Links = 2; + if c == cid.Undef { + err = errors.New("link is missing CID") + } + + // note we accept present but empty name entries on files because some historic + // encoder emited a whole bunch of them in the wild + if len(name) != 0 || len(directoryChildrens) != 0 { + // Directory entry + if blocksizes != 0 { + err = errors.New("mixed use of blocksizes and named links") + return + } + + if len(fileChildrens) != 0 { + moveZeroNamedDirectoryEntriesToDirectoryChildrens(1) + } + + directoryChildrens = append(directoryChildrens, DirectoryEntry{ + Entry: Entry{Cid: c, tSize: tSize}, + Name: AliasableString(name), + }) + } else { + // File entry + if uint(len(fileChildrens)) > fileLinks { + // we have discovered more blocksizes than links at this point, play catchup + fileChildrens[fileLinks].Cid = c + fileChildrens[fileLinks].tSize = tSize + } else { + // we have discovered more links than blocksizes at this point, add new entries + fileChildrens = append(fileChildrens, FileEntry{Entry: Entry{Cid: c, tSize: tSize}}) + } + fileLinks++ + } + } + + if group { + // Now that we have found the end restore data. + data = mData + } + + default: + data, err = pbHandleUnknownField(t, data) + if err != nil { + return + } + } + } + + switch dataType { + case pbFile: + if len(directoryChildrens) != 0 { + err = errors.New("named links in file") + return + } + + if fileLinks != blocksizes { + err = fmt.Errorf("unmatched links (%d) and blocksizes (%d) sisterlists", uint(len(fileChildrens)), blocksizes) + return + } + + typ = TFile + file = File{ + Entry: Entry{Cid: inCid, tSize: selfTSize + uint64(len(origData))}, + Data: content, + Childrens: fileChildrens, + } + + // TODO: directory and symlink + return + default: + err = fmt.Errorf("unknown node type: %d", dataType) + return + } +} + +// pbHandleUnknownField must be called right after the tag, it will handle +// skipping uneeded values if needed. +func pbHandleUnknownField(t protowire.Type, data []byte) ([]byte, error) { + var l int + switch t { + case protowire.BytesType: + _, l = protowire.ConsumeBytes(data) + case protowire.VarintType: + _, l = protowire.ConsumeVarint(data) + case protowire.Fixed64Type: + _, l = protowire.ConsumeFixed64(data) + case protowire.Fixed32Type: + _, l = protowire.ConsumeFixed32(data) + case protowire.StartGroupType: + // Walks over the group, it must be called after SGROUP tag and before EGROUP. + // Groups are an ancient way to create sub messages, they work with start and end tags. + // We found an unknown group, skip all of it by tracking the stack of start and ends. + groupStack := 1 + for groupStack != 0 && len(data) != 0 { + _, t, l := protowire.ConsumeTag(data) + if l < 0 { + return nil, protowire.ParseError(l) + } + data = data[l:] + switch t { + case protowire.StartGroupType: + groupStack++ + case protowire.EndGroupType: + groupStack-- + } + } + if groupStack != 0 { + return nil, errors.New("unterminated group") + } + return data, nil + case protowire.EndGroupType: + return nil, errors.New("unmatched end-group") + default: + return nil, fmt.Errorf("unknown protobuf type: %v", t) + } + if l < 0 { + return nil, protowire.ParseError(l) + } + return data[l:], nil +} + +// pbDecodeNumber will decode a uint64 as best as it can. +// It must be called right after the tag. +func pbDecodeNumber(typ protowire.Type, data []byte) ([]byte, uint64, error) { + var v uint64 + var l int + switch typ { + case protowire.VarintType: + v, l = protowire.ConsumeVarint(data) + case protowire.Fixed64Type: + v, l = protowire.ConsumeFixed64(data) + case protowire.Fixed32Type: + var v32 uint32 + v32, l = protowire.ConsumeFixed32(data) + v = uint64(v32) + default: + return nil, 0, fmt.Errorf("unexpected type for number %v", typ) + } + if l < 0 { + return nil, 0, protowire.ParseError(l) + } + return data[l:], v, nil +} diff --git a/unixfs/pb_test.go b/unixfs/pb_test.go new file mode 100644 index 000000000..cda4a8792 --- /dev/null +++ b/unixfs/pb_test.go @@ -0,0 +1,45 @@ +package unixfs + +import ( + "encoding/base64" + "testing" + + "github.com/ipfs/go-cid" + mh "github.com/multiformats/go-multihash" +) + +const someDagPBBlock = `EisKIhIgVuq+9ViNicx1O8bIsb978a8u1uoTjm4taEeNW7gcB+cSABiu1OAVEioKIhIg7XyJKU3lrLCYFLKmcNTtKc82BUBCi5ePAeAqz2M1pWYSABirmGcKEAgCGPGUxxYggIDgFSDxlGc=` + +func BenchmarkPB(b *testing.B) { + data, err := base64.StdEncoding.DecodeString(someDagPBBlock) + if err != nil { + b.Fatal(err) + } + mh, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + b.Fatal() + } + c := cid.NewCidV0(mh) + + b.ResetTimer() + var out []FileEntry + for i := b.N; i != 0; i-- { + _, f, _, _, _ := parsePB(out[:0], nil, c, data) + out = f.Childrens + } +} + +func FuzzPB(f *testing.F) { + data, err := base64.StdEncoding.DecodeString(someDagPBBlock) + if err != nil { + f.Fatal(err) + } + f.Add(data) + f.Fuzz(func(_ *testing.T, b []byte) { + if len(b) > 2*1024*1024 { + // Assume a block limit is inplace. + return + } + parsePB(nil, nil, cid.Undef, b) + }) +} diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index 48e5c256a..a5b3584c7 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -96,6 +96,7 @@ type Node interface { // Parse it provides a type safe solution to Decode using the badged interface [Node]. // [File.Data], [DirectoryEntry.Name] and [Symlink.Value] values are aliased to b.RawData(). // The data argument MUST hash to cid, this wont check the validaty of the hash. +// It assumes the size of the block is limited and reasonable. func Parse(b blocks.Block) (Node, error) { switch t, f, d, s, err := ParseAppend(nil, nil, b.Cid(), b.RawData()); t { case TError: @@ -119,24 +120,27 @@ func Parse(b blocks.Block) (Node, error) { // It is only ever gonna clobber the slice related to the type of data decoded. // It only ever clobber extra capacity within the slices, it may do so in the case of an error. // The data argument MUST hash to cid, this wont check the validaty of the hash. -func ParseAppend(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, cid cid.Cid, data []byte) (t Type, f File, d Directory, s Symlink, err error) { +// It assumes the size of the block is limited and reasonable. +func ParseAppend(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inCid cid.Cid, data []byte) (t Type, f File, d Directory, s Symlink, err error) { // Avoid clobbering the used part of the slice. fileChildrens = fileChildrens[len(fileChildrens):] directoryChildrens = directoryChildrens[len(directoryChildrens):] - pref := cid.Prefix() + pref := inCid.Prefix() switch c := multicodec.Code(pref.Codec); c { case multicodec.Raw: t = TFile f = File{ Entry: Entry{ - Cid: cid, + Cid: inCid, tSize: uint64(len(data)) + 1, }, Data: data, Childrens: fileChildrens, } return + case multicodec.DagPb: + return parsePB(fileChildrens, directoryChildrens, inCid, data) default: err = errors.New("unsupported codec: " + c.String()) return diff --git a/unixfs/unixfs_test.go b/unixfs/unixfs_test.go index f9ac061f4..3158ed13b 100644 --- a/unixfs/unixfs_test.go +++ b/unixfs/unixfs_test.go @@ -2,6 +2,7 @@ package unixfs_test import ( "bytes" + "encoding/base64" "testing" . "github.com/ipfs/boxo/unixfs" @@ -70,3 +71,79 @@ func TestRaw(t *testing.T) { } }) } + +func TestFilePB(t *testing.T) { + t.Parallel() + data, err := base64.StdEncoding.DecodeString(`EisKIhIgVuq+9ViNicx1O8bIsb978a8u1uoTjm4taEeNW7gcB+cSABiu1OAVEioKIhIg7XyJKU3lrLCYFLKmcNTtKc82BUBCi5ePAeAqz2M1pWYSABirmGcKEAgCGPGUxxYggIDgFSDxlGc=`) + if err != nil { + t.Fatal(err) + } + mh, err := mh.Sum(data, mh.SHA2_256, -1) + if err != nil { + t.Fatal() + } + c := cid.NewCidV0(mh) + + const firstChildrenTSize = 45623854 + const secondChildrenTSize = 1690667 + expectedChildrens := [2]FileEntry{ + FileEntryWithTSize(cid.MustParse("QmUBwP7RczPWbJSCpR4BygzvTNbJ2sfjt5yuRphSVYaJar"), 45613056, firstChildrenTSize), + FileEntryWithTSize(cid.MustParse("QmeKhUSkRVDFbxssXpnb15UQf25YdWN9Ck3rjfZA3tvD8h"), 1690225, secondChildrenTSize), + } + + validate := func(t *testing.T, f File) { + if f.Cid != c { + t.Errorf("expected %v cid got %v", c, f.Cid) + } + + if len(f.Data) != 0 { + t.Errorf("got unexpected data %q", f.Data) + } + + tSize, ok := f.TSize() + if !ok { + t.Error("missing TSize") + } else if et := uint64(len(data)) + firstChildrenTSize + secondChildrenTSize; tSize != et { + t.Errorf("tSize expected %d got %d", et, tSize) + } + + if len(f.Childrens) != 2 { + t.Errorf("expected 2 childrens got %v", f.Childrens) + } else if *(*[2]FileEntry)(f.Childrens) != expectedChildrens { + t.Errorf("childrens don't match, expected %v got %v", expectedChildrens, f.Childrens) + } + } + + t.Run("Parse", func(t *testing.T) { + t.Parallel() + b, err := blocks.NewBlockWithCid(data, c) + if err != nil { + t.Fatal(err) + } + a, err := Parse(b) + if err != nil { + t.Fatal(err) + } + f, ok := a.(File) + if !ok { + t.Fatalf("expected File got %T", a) + } + validate(t, f) + }) + t.Run("ParseAppend", func(t *testing.T) { + t.Parallel() + var someArr [3]FileEntry + typ, f, _, _, err := ParseAppend(someArr[:1], nil, c, data) + if err != nil { + t.Fatal(err) + } + if typ != TFile { + t.Fatalf("expected %v got %v", TFile, typ) + } + validate(t, f) + // Check someArr[1] to ensure it doesn't touch already existing entries before len. + if &someArr[1] != &f.Childrens[:1][0] { + t.Fatal("expected pointers to still be aliased but they are not") + } + }) +} From a8c8644b7df49dc06c10dc4f8af65abc19c457d3 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Mon, 12 Jun 2023 11:15:29 +0200 Subject: [PATCH 14/39] unixfs: Add support for byte slices backed CIDs for zero allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` $ benchstat /mnt/ramdisk/{old,new} name old time/op new time/op delta PB-12 730ns ± 5% 255ns ± 2% -65.04% (p=0.000 n=9+10) name old alloc/op new alloc/op delta PB-12 224B ± 0% 0B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta PB-12 4.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) ``` --- examples/go.mod | 6 ++--- examples/go.sum | 12 ++++----- go.mod | 6 ++--- go.sum | 12 ++++----- unixfs/pb.go | 29 +++++++++++--------- unixfs/pb_test.go | 7 ++--- unixfs/unixfs.go | 62 +++++++++++++++++++++++-------------------- unixfs/unixfs_test.go | 21 ++++++++------- 8 files changed, 82 insertions(+), 73 deletions(-) diff --git a/examples/go.mod b/examples/go.mod index 0d9ddad09..f84a1d9e6 100644 --- a/examples/go.mod +++ b/examples/go.mod @@ -6,7 +6,7 @@ require ( github.com/gogo/protobuf v1.3.2 github.com/ipfs/boxo v0.7.1-0.20230323075409-f4a8dd6614df github.com/ipfs/go-block-format v0.1.2 - github.com/ipfs/go-cid v0.4.0 + github.com/ipfs/go-cid v0.4.2-0.20230612091241-80d1e915f662 github.com/ipfs/go-datastore v0.6.0 github.com/ipld/go-ipld-prime v0.20.0 github.com/libp2p/go-libp2p v0.26.3 @@ -102,8 +102,8 @@ require ( github.com/multiformats/go-base36 v0.2.0 // indirect github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect github.com/multiformats/go-multiaddr-fmt v0.1.0 // indirect - github.com/multiformats/go-multibase v0.1.1 // indirect - github.com/multiformats/go-multihash v0.2.1 // indirect + github.com/multiformats/go-multibase v0.2.0 // indirect + github.com/multiformats/go-multihash v0.2.3 // indirect github.com/multiformats/go-multistream v0.4.1 // indirect github.com/multiformats/go-varint v0.0.7 // indirect github.com/onsi/ginkgo/v2 v2.5.1 // indirect diff --git a/examples/go.sum b/examples/go.sum index 8e6652fff..f95d4a076 100644 --- a/examples/go.sum +++ b/examples/go.sum @@ -284,8 +284,8 @@ github.com/ipfs/go-blockservice v0.5.0 h1:B2mwhhhVQl2ntW2EIpaWPwSCxSuqr5fFA93Ms4 github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.6/go.mod h1:6Ux9z5e+HpkQdckYoX1PG/6xqKspzlEIR5SDmgqgC/I= -github.com/ipfs/go-cid v0.4.0 h1:a4pdZq0sx6ZSxbCizebnKiMCx/xI/aBBFlB73IgH4rA= -github.com/ipfs/go-cid v0.4.0/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk= +github.com/ipfs/go-cid v0.4.2-0.20230612091241-80d1e915f662 h1:jWQ5yOEmR1Fvv6Rj8mEye4QTeGQoKKECMieju9z5kgA= +github.com/ipfs/go-cid v0.4.2-0.20230612091241-80d1e915f662/go.mod h1:4rtyA9XdBeZBapaRNJuTY9H+/6bG4URx/cVwjAzK6fw= github.com/ipfs/go-datastore v0.6.0 h1:JKyz+Gvz1QEZw0LsX1IBn+JFCJQH4SJVFtM4uWU0Myk= github.com/ipfs/go-datastore v0.6.0/go.mod h1:rt5M3nNbSO/8q1t4LNkLyUwRs8HupMeN/8O4Vn9YAT8= github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= @@ -469,16 +469,16 @@ github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/e github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo= github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= -github.com/multiformats/go-multibase v0.1.1 h1:3ASCDsuLX8+j4kx58qnJ4YFq/JWTJpCyDW27ztsVTOI= -github.com/multiformats/go-multibase v0.1.1/go.mod h1:ZEjHE+IsUrgp5mhlEAYjMtZwK1k4haNkcaPg9aoe1a8= +github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= +github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= github.com/multiformats/go-multicodec v0.8.1 h1:ycepHwavHafh3grIbR1jIXnKCsFm0fqsfEOsJ8NtKE8= github.com/multiformats/go-multicodec v0.8.1/go.mod h1:L3QTQvMIaVBkXOXXtVmYE+LI16i14xuaojr/H7Ai54k= github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= github.com/multiformats/go-multihash v0.0.8/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc= -github.com/multiformats/go-multihash v0.2.1 h1:aem8ZT0VA2nCHHk7bPJ1BjUbHNciqZC/d16Vve9l108= -github.com/multiformats/go-multihash v0.2.1/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= +github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= +github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= github.com/multiformats/go-multistream v0.4.1 h1:rFy0Iiyn3YT0asivDUIR05leAdwZq3de4741sbiSdfo= github.com/multiformats/go-multistream v0.4.1/go.mod h1:Mz5eykRVAjJWckE2U78c6xqdtyNUEhKSM0Lwar2p77Q= github.com/multiformats/go-varint v0.0.1/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= diff --git a/go.mod b/go.mod index 5a9c3ce70..4ce6a978f 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/ipfs/bbloom v0.0.4 github.com/ipfs/go-bitfield v1.1.0 github.com/ipfs/go-block-format v0.1.2 - github.com/ipfs/go-cid v0.4.0 + github.com/ipfs/go-cid v0.4.2-0.20230612091241-80d1e915f662 github.com/ipfs/go-cidutil v0.1.0 github.com/ipfs/go-datastore v0.6.0 github.com/ipfs/go-detect-race v0.0.1 @@ -48,9 +48,9 @@ require ( github.com/multiformats/go-base32 v0.1.0 github.com/multiformats/go-multiaddr v0.8.0 github.com/multiformats/go-multiaddr-dns v0.3.1 - github.com/multiformats/go-multibase v0.1.1 + github.com/multiformats/go-multibase v0.2.0 github.com/multiformats/go-multicodec v0.8.1 - github.com/multiformats/go-multihash v0.2.1 + github.com/multiformats/go-multihash v0.2.3 github.com/multiformats/go-multistream v0.4.1 github.com/multiformats/go-varint v0.0.7 github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 diff --git a/go.sum b/go.sum index 19a2a5793..4d08f8af7 100644 --- a/go.sum +++ b/go.sum @@ -289,8 +289,8 @@ github.com/ipfs/go-cid v0.0.1/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUP github.com/ipfs/go-cid v0.0.3/go.mod h1:GHWU/WuQdMPmIosc4Yn1bcCT7dSeX4lBafM7iqUPQvM= github.com/ipfs/go-cid v0.0.5/go.mod h1:plgt+Y5MnOey4vO4UlUazGqdbEXuFYitED67FexhXog= github.com/ipfs/go-cid v0.0.6/go.mod h1:6Ux9z5e+HpkQdckYoX1PG/6xqKspzlEIR5SDmgqgC/I= -github.com/ipfs/go-cid v0.4.0 h1:a4pdZq0sx6ZSxbCizebnKiMCx/xI/aBBFlB73IgH4rA= -github.com/ipfs/go-cid v0.4.0/go.mod h1:uQHwDeX4c6CtyrFwdqyhpNcxVewur1M7l7fNU7LKwZk= +github.com/ipfs/go-cid v0.4.2-0.20230612091241-80d1e915f662 h1:jWQ5yOEmR1Fvv6Rj8mEye4QTeGQoKKECMieju9z5kgA= +github.com/ipfs/go-cid v0.4.2-0.20230612091241-80d1e915f662/go.mod h1:4rtyA9XdBeZBapaRNJuTY9H+/6bG4URx/cVwjAzK6fw= github.com/ipfs/go-cidutil v0.1.0 h1:RW5hO7Vcf16dplUU60Hs0AKDkQAVPVplr7lk97CFL+Q= github.com/ipfs/go-cidutil v0.1.0/go.mod h1:e7OEVBMIv9JaOxt9zaGEmAoSlXW9jdFZ5lP/0PwcfpA= github.com/ipfs/go-datastore v0.5.0/go.mod h1:9zhEApYMTl17C8YDp7JmU7sQZi2/wqiYh73hakZ90Bk= @@ -492,16 +492,16 @@ github.com/multiformats/go-multiaddr-fmt v0.1.0 h1:WLEFClPycPkp4fnIzoFoV9FVd49/e github.com/multiformats/go-multiaddr-fmt v0.1.0/go.mod h1:hGtDIW4PU4BqJ50gW2quDuPVjyWNZxToGUh/HwTZYJo= github.com/multiformats/go-multibase v0.0.1/go.mod h1:bja2MqRZ3ggyXtZSEDKpl0uO/gviWFaSteVbWT51qgs= github.com/multiformats/go-multibase v0.0.3/go.mod h1:5+1R4eQrT3PkYZ24C3W2Ue2tPwIdYQD509ZjSb5y9Oc= -github.com/multiformats/go-multibase v0.1.1 h1:3ASCDsuLX8+j4kx58qnJ4YFq/JWTJpCyDW27ztsVTOI= -github.com/multiformats/go-multibase v0.1.1/go.mod h1:ZEjHE+IsUrgp5mhlEAYjMtZwK1k4haNkcaPg9aoe1a8= +github.com/multiformats/go-multibase v0.2.0 h1:isdYCVLvksgWlMW9OZRYJEa9pZETFivncJHmHnnd87g= +github.com/multiformats/go-multibase v0.2.0/go.mod h1:bFBZX4lKCA/2lyOFSAoKH5SS6oPyjtnzK/XTFDPkNuk= github.com/multiformats/go-multicodec v0.8.1 h1:ycepHwavHafh3grIbR1jIXnKCsFm0fqsfEOsJ8NtKE8= github.com/multiformats/go-multicodec v0.8.1/go.mod h1:L3QTQvMIaVBkXOXXtVmYE+LI16i14xuaojr/H7Ai54k= github.com/multiformats/go-multihash v0.0.1/go.mod h1:w/5tugSrLEbWqlcgJabL3oHFKTwfvkofsjW2Qa1ct4U= github.com/multiformats/go-multihash v0.0.8/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= github.com/multiformats/go-multihash v0.0.10/go.mod h1:YSLudS+Pi8NHE7o6tb3D8vrpKa63epEDmG8nTduyAew= github.com/multiformats/go-multihash v0.0.13/go.mod h1:VdAWLKTwram9oKAatUcLxBNUjdtcVwxObEQBtRfuyjc= -github.com/multiformats/go-multihash v0.2.1 h1:aem8ZT0VA2nCHHk7bPJ1BjUbHNciqZC/d16Vve9l108= -github.com/multiformats/go-multihash v0.2.1/go.mod h1:WxoMcYG85AZVQUyRyo9s4wULvW5qrI9vb2Lt6evduFc= +github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= +github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= github.com/multiformats/go-multistream v0.4.1 h1:rFy0Iiyn3YT0asivDUIR05leAdwZq3de4741sbiSdfo= github.com/multiformats/go-multistream v0.4.1/go.mod h1:Mz5eykRVAjJWckE2U78c6xqdtyNUEhKSM0Lwar2p77Q= github.com/multiformats/go-varint v0.0.1/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= diff --git a/unixfs/pb.go b/unixfs/pb.go index 3d2f39818..dadc21483 100644 --- a/unixfs/pb.go +++ b/unixfs/pb.go @@ -69,7 +69,11 @@ const ( // optional Data Data = 1; // } -func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inCid cid.Cid, origData []byte) (typ Type, file File, dir Directory, sym Symlink, err error) { +func parsePB[Self, Children cid.Storage]( + fileChildrens []FileEntry[Children], + directoryChildrens []DirectoryEntry[Children], + inCid cid.GenericCid[Self], origData []byte, +) (typ Type, file File[Self, Children], dir Directory[Self, Children], sym Symlink[Self], err error) { var dataType uint64 var fileLinks, blocksizes uint var content []byte @@ -81,8 +85,8 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC // FIXME: is an empty name a valid file name in a directory ? directoryChildrens = slices.Grow(directoryChildrens, len(fileChildrens)+extra) for _, v := range fileChildrens { - directoryChildrens = append(directoryChildrens, DirectoryEntry{ - Entry: Entry{Cid: v.Cid, tSize: v.tSize}, + directoryChildrens = append(directoryChildrens, DirectoryEntry[Children]{ + Entry: Entry[Children]{Cid: v.Cid, tSize: v.tSize}, Name: AliasableString{}, }) } @@ -125,8 +129,7 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC return } - // FIXME: add support aliased CIDs in github.com/ipfs/go-cid - var c cid.Cid + var c cid.GenericCid[Children] var name []byte var tSize uint64 // will be offset by +1, zero means not found @@ -182,7 +185,7 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC fileChildrens[blocksizes].FileSize = blocksize } else { // we have discovered more blocksizes than links at this point, add new entries - fileChildrens = append(fileChildrens, FileEntry{FileSize: blocksize}) + fileChildrens = append(fileChildrens, FileEntry[Children]{FileSize: blocksize}) } blocksizes++ return nil @@ -244,7 +247,7 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC } mData = mData[l:] - c, err = cid.Cast(cBytes) + c, err = cid.CastGeneric[Children](cBytes) if err != nil { err = fmt.Errorf("failed to decode cid: %w", err) return @@ -293,7 +296,7 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC if outerNumber == 2 { // repeated PBLink Links = 2; - if c == cid.Undef { + if !c.Defined() { err = errors.New("link is missing CID") } @@ -310,8 +313,8 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC moveZeroNamedDirectoryEntriesToDirectoryChildrens(1) } - directoryChildrens = append(directoryChildrens, DirectoryEntry{ - Entry: Entry{Cid: c, tSize: tSize}, + directoryChildrens = append(directoryChildrens, DirectoryEntry[Children]{ + Entry: Entry[Children]{Cid: c, tSize: tSize}, Name: AliasableString(name), }) } else { @@ -322,7 +325,7 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC fileChildrens[fileLinks].tSize = tSize } else { // we have discovered more links than blocksizes at this point, add new entries - fileChildrens = append(fileChildrens, FileEntry{Entry: Entry{Cid: c, tSize: tSize}}) + fileChildrens = append(fileChildrens, FileEntry[Children]{Entry: Entry[Children]{Cid: c, tSize: tSize}}) } fileLinks++ } @@ -354,8 +357,8 @@ func parsePB(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inC } typ = TFile - file = File{ - Entry: Entry{Cid: inCid, tSize: selfTSize + uint64(len(origData))}, + file = File[Self, Children]{ + Entry: Entry[Self]{Cid: inCid, tSize: selfTSize + uint64(len(origData))}, Data: content, Childrens: fileChildrens, } diff --git a/unixfs/pb_test.go b/unixfs/pb_test.go index cda4a8792..b83d22f0c 100644 --- a/unixfs/pb_test.go +++ b/unixfs/pb_test.go @@ -19,10 +19,10 @@ func BenchmarkPB(b *testing.B) { if err != nil { b.Fatal() } - c := cid.NewCidV0(mh) + c := cid.NewCidV0Generic[[]byte](mh) b.ResetTimer() - var out []FileEntry + var out []FileEntry[[]byte] for i := b.N; i != 0; i-- { _, f, _, _, _ := parsePB(out[:0], nil, c, data) out = f.Childrens @@ -40,6 +40,7 @@ func FuzzPB(f *testing.F) { // Assume a block limit is inplace. return } - parsePB(nil, nil, cid.Undef, b) + var zero cid.GenericCid[[]byte] + parsePB[[]byte, []byte](nil, nil, zero, b) }) } diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index a5b3584c7..e8b926cb6 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -17,14 +17,14 @@ import ( ) // Entry is a basic unit block. -type Entry struct { - Cid cid.Cid +type Entry[S cid.Storage] struct { + Cid cid.GenericCid[S] // tSize encode the comulative size of the DAG. // the zero value indicates tsize is missing. tSize uint64 } -func (e Entry) TSize() (tsize uint64, ok bool) { +func (e Entry[S]) TSize() (tsize uint64, ok bool) { if e.tSize == 0 { return 0, false } @@ -32,47 +32,47 @@ func (e Entry) TSize() (tsize uint64, ok bool) { return e.tSize - 1, true } -func (e Entry) Untyped() Entry { +func (e Entry[S]) Untyped() Entry[S] { return e } -var _ Node = File{} +var _ Node[string] = File[string, string]{} -type File struct { +type File[Self, Children cid.Storage] struct { badge - Entry + Entry[Self] Data []byte - Childrens []FileEntry + Childrens []FileEntry[Children] } -func FileEntryWithTSize(c cid.Cid, fileSize, tSize uint64) FileEntry { - return FileEntry{Entry: Entry{Cid: c, tSize: tSize + 1}, FileSize: fileSize} +func FileEntryWithTSize[S cid.Storage](c cid.GenericCid[S], fileSize, tSize uint64) FileEntry[S] { + return FileEntry[S]{Entry: Entry[S]{Cid: c, tSize: tSize + 1}, FileSize: fileSize} } -type FileEntry struct { - Entry +type FileEntry[S cid.Storage] struct { + Entry[S] // FileSize is the logical size of the file at this location once decoded. FileSize uint64 } -var _ Node = Directory{} +var _ Node[string] = Directory[string, string]{} -type Directory struct { +type Directory[Self, Children cid.Storage] struct { badge - Entry - Childrens []DirectoryEntry + Entry[Self] + Childrens []DirectoryEntry[Children] } -type DirectoryEntry struct { - Entry +type DirectoryEntry[S cid.Storage] struct { + Entry[S] Name AliasableString } -var _ Node = Symlink{} +var _ Node[string] = Symlink[string]{} -type Symlink struct { +type Symlink[S cid.Storage] struct { badge - Entry + Entry[S] Value []byte } @@ -86,9 +86,9 @@ func (badge) nodeBadge() { // Node is an interface that can exclusively be a [File], [Directory] or [Symlink]. We might add more in the future. // You MUST NOT embed this interface, it's only purpose is to provide type safe enums. -type Node interface { +type Node[S cid.Storage] interface { // Untyped returns the untyped [Entry] for that value stripped of all type related information. - Untyped() Entry + Untyped() Entry[S] // nodeBadge must never be called it's just here to trick the type checker. nodeBadge() } @@ -97,8 +97,8 @@ type Node interface { // [File.Data], [DirectoryEntry.Name] and [Symlink.Value] values are aliased to b.RawData(). // The data argument MUST hash to cid, this wont check the validaty of the hash. // It assumes the size of the block is limited and reasonable. -func Parse(b blocks.Block) (Node, error) { - switch t, f, d, s, err := ParseAppend(nil, nil, b.Cid(), b.RawData()); t { +func Parse[Children cid.Storage](b blocks.Block) (Node[string], error) { + switch t, f, d, s, err := ParseAppend[string, Children](nil, nil, b.Cid(), b.RawData()); t { case TError: return nil, err case TFile: @@ -121,7 +121,11 @@ func Parse(b blocks.Block) (Node, error) { // It only ever clobber extra capacity within the slices, it may do so in the case of an error. // The data argument MUST hash to cid, this wont check the validaty of the hash. // It assumes the size of the block is limited and reasonable. -func ParseAppend(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, inCid cid.Cid, data []byte) (t Type, f File, d Directory, s Symlink, err error) { +func ParseAppend[Self, Children cid.Storage]( + fileChildrens []FileEntry[Children], + directoryChildrens []DirectoryEntry[Children], + inCid cid.GenericCid[Self], data []byte, +) (t Type, f File[Self, Children], d Directory[Self, Children], s Symlink[Self], err error) { // Avoid clobbering the used part of the slice. fileChildrens = fileChildrens[len(fileChildrens):] directoryChildrens = directoryChildrens[len(directoryChildrens):] @@ -130,8 +134,8 @@ func ParseAppend(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, switch c := multicodec.Code(pref.Codec); c { case multicodec.Raw: t = TFile - f = File{ - Entry: Entry{ + f = File[Self, Children]{ + Entry: Entry[Self]{ Cid: inCid, tSize: uint64(len(data)) + 1, }, @@ -140,7 +144,7 @@ func ParseAppend(fileChildrens []FileEntry, directoryChildrens []DirectoryEntry, } return case multicodec.DagPb: - return parsePB(fileChildrens, directoryChildrens, inCid, data) + return parsePB[Self, Children](fileChildrens, directoryChildrens, inCid, data) default: err = errors.New("unsupported codec: " + c.String()) return diff --git a/unixfs/unixfs_test.go b/unixfs/unixfs_test.go index 3158ed13b..09320f480 100644 --- a/unixfs/unixfs_test.go +++ b/unixfs/unixfs_test.go @@ -9,6 +9,7 @@ import ( blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" mh "github.com/multiformats/go-multihash" + "golang.org/x/exp/slices" ) func TestRaw(t *testing.T) { @@ -20,7 +21,7 @@ func TestRaw(t *testing.T) { } c := cid.NewCidV1(cid.Raw, mh) - validate := func(t *testing.T, f File) { + validate := func(t *testing.T, f File[string, string]) { if !bytes.Equal(data, f.Data) { t.Errorf("expected %v got %v", data, f.Data) } @@ -44,11 +45,11 @@ func TestRaw(t *testing.T) { if err != nil { t.Fatal(err) } - a, err := Parse(b) + a, err := Parse[string](b) if err != nil { t.Fatal(err) } - f, ok := a.(File) + f, ok := a.(File[string, string]) if !ok { t.Fatalf("expected File got %T", a) } @@ -56,7 +57,7 @@ func TestRaw(t *testing.T) { }) t.Run("ParseAppend", func(t *testing.T) { t.Parallel() - var someArr [2]FileEntry + var someArr [2]FileEntry[string] typ, f, _, _, err := ParseAppend(someArr[:1], nil, c, data) if err != nil { t.Fatal(err) @@ -86,12 +87,12 @@ func TestFilePB(t *testing.T) { const firstChildrenTSize = 45623854 const secondChildrenTSize = 1690667 - expectedChildrens := [2]FileEntry{ + expectedChildrens := [2]FileEntry[string]{ FileEntryWithTSize(cid.MustParse("QmUBwP7RczPWbJSCpR4BygzvTNbJ2sfjt5yuRphSVYaJar"), 45613056, firstChildrenTSize), FileEntryWithTSize(cid.MustParse("QmeKhUSkRVDFbxssXpnb15UQf25YdWN9Ck3rjfZA3tvD8h"), 1690225, secondChildrenTSize), } - validate := func(t *testing.T, f File) { + validate := func(t *testing.T, f File[string, string]) { if f.Cid != c { t.Errorf("expected %v cid got %v", c, f.Cid) } @@ -109,7 +110,7 @@ func TestFilePB(t *testing.T) { if len(f.Childrens) != 2 { t.Errorf("expected 2 childrens got %v", f.Childrens) - } else if *(*[2]FileEntry)(f.Childrens) != expectedChildrens { + } else if !slices.Equal(f.Childrens, expectedChildrens[:]) { t.Errorf("childrens don't match, expected %v got %v", expectedChildrens, f.Childrens) } } @@ -120,11 +121,11 @@ func TestFilePB(t *testing.T) { if err != nil { t.Fatal(err) } - a, err := Parse(b) + a, err := Parse[string](b) if err != nil { t.Fatal(err) } - f, ok := a.(File) + f, ok := a.(File[string, string]) if !ok { t.Fatalf("expected File got %T", a) } @@ -132,7 +133,7 @@ func TestFilePB(t *testing.T) { }) t.Run("ParseAppend", func(t *testing.T) { t.Parallel() - var someArr [3]FileEntry + var someArr [3]FileEntry[string] typ, f, _, _, err := ParseAppend(someArr[:1], nil, c, data) if err != nil { t.Fatal(err) From 7ece6c848b3b9d572b0bc6bd8371e6dcb51a8ce4 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 17 Jun 2023 10:43:24 +0200 Subject: [PATCH 15/39] unixfs: fix CI and style --- unixfs/unixfs.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index e8b926cb6..0a79d61ae 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -11,7 +11,7 @@ package unixfs import ( "errors" - "github.com/ipfs/go-block-format" + blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" "github.com/multiformats/go-multicodec" ) @@ -39,6 +39,7 @@ func (e Entry[S]) Untyped() Entry[S] { var _ Node[string] = File[string, string]{} type File[Self, Children cid.Storage] struct { + //lint:ignore U1000 this is a badge patern badge Entry[Self] Data []byte @@ -58,6 +59,7 @@ type FileEntry[S cid.Storage] struct { var _ Node[string] = Directory[string, string]{} type Directory[Self, Children cid.Storage] struct { + //lint:ignore U1000 this is a badge patern badge Entry[Self] Childrens []DirectoryEntry[Children] @@ -71,6 +73,7 @@ type DirectoryEntry[S cid.Storage] struct { var _ Node[string] = Symlink[string]{} type Symlink[S cid.Storage] struct { + //lint:ignore U1000 this is a badge patern badge Entry[S] Value []byte @@ -80,6 +83,7 @@ type Symlink[S cid.Storage] struct { // If you add a new type using this you need to update [Parse]. type badge struct{} +//lint:ignore U1000 this is a badge patern func (badge) nodeBadge() { panic("badge was called even tho it only exists as a way to trick the type checker") } @@ -144,7 +148,7 @@ func ParseAppend[Self, Children cid.Storage]( } return case multicodec.DagPb: - return parsePB[Self, Children](fileChildrens, directoryChildrens, inCid, data) + return parsePB(fileChildrens, directoryChildrens, inCid, data) default: err = errors.New("unsupported codec: " + c.String()) return From f7dd803f75e0a00bc09a14d0c3c786135d44555b Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 17 Jun 2023 18:19:05 +0200 Subject: [PATCH 16/39] unixfs: fix handling of unknown fields --- unixfs/pb.go | 78 +++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/unixfs/pb.go b/unixfs/pb.go index dadc21483..8430187f8 100644 --- a/unixfs/pb.go +++ b/unixfs/pb.go @@ -374,47 +374,51 @@ func parsePB[Self, Children cid.Storage]( // pbHandleUnknownField must be called right after the tag, it will handle // skipping uneeded values if needed. func pbHandleUnknownField(t protowire.Type, data []byte) ([]byte, error) { - var l int - switch t { - case protowire.BytesType: - _, l = protowire.ConsumeBytes(data) - case protowire.VarintType: - _, l = protowire.ConsumeVarint(data) - case protowire.Fixed64Type: - _, l = protowire.ConsumeFixed64(data) - case protowire.Fixed32Type: - _, l = protowire.ConsumeFixed32(data) - case protowire.StartGroupType: - // Walks over the group, it must be called after SGROUP tag and before EGROUP. - // Groups are an ancient way to create sub messages, they work with start and end tags. - // We found an unknown group, skip all of it by tracking the stack of start and ends. - groupStack := 1 - for groupStack != 0 && len(data) != 0 { - _, t, l := protowire.ConsumeTag(data) - if l < 0 { - return nil, protowire.ParseError(l) - } - data = data[l:] - switch t { - case protowire.StartGroupType: - groupStack++ - case protowire.EndGroupType: - groupStack-- + if len(data) == 0 { + return nil, errors.New("no field to consume") + } + + var groupStack uint + for { + var l int + switch t { + case protowire.BytesType: + _, l = protowire.ConsumeBytes(data) + case protowire.VarintType: + _, l = protowire.ConsumeVarint(data) + case protowire.Fixed64Type: + _, l = protowire.ConsumeFixed64(data) + case protowire.Fixed32Type: + _, l = protowire.ConsumeFixed32(data) + case protowire.StartGroupType: + groupStack++ + goto next + case protowire.EndGroupType: + if groupStack == 0 { + return nil, errors.New("unmatched end group") } + groupStack-- + goto next + default: + return nil, fmt.Errorf("unknown protobuf type: %v", t) } - if groupStack != 0 { - return nil, errors.New("unterminated group") + if l < 0 { + return nil, protowire.ParseError(l) } - return data, nil - case protowire.EndGroupType: - return nil, errors.New("unmatched end-group") - default: - return nil, fmt.Errorf("unknown protobuf type: %v", t) - } - if l < 0 { - return nil, protowire.ParseError(l) + data = data[l:] + + next: + if groupStack == 0 { + break + } + + _, t, l = protowire.ConsumeTag(data) + if l < 0 { + return nil, protowire.ParseError(l) + } + data = data[l:] } - return data[l:], nil + return data, nil } // pbDecodeNumber will decode a uint64 as best as it can. From 6d04a4b51ac10536c3150455b463cef6158ad35a Mon Sep 17 00:00:00 2001 From: Jorropo Date: Mon, 26 Jun 2023 02:06:30 +0200 Subject: [PATCH 17/39] unixfs: hoist unixfs related checks in format blind code --- unixfs/pb.go | 100 ++++++++++++++++------------------------------ unixfs/pb_test.go | 3 +- unixfs/unixfs.go | 37 ++++++++++++++++- 3 files changed, 71 insertions(+), 69 deletions(-) diff --git a/unixfs/pb.go b/unixfs/pb.go index 8430187f8..a90e12955 100644 --- a/unixfs/pb.go +++ b/unixfs/pb.go @@ -73,11 +73,8 @@ func parsePB[Self, Children cid.Storage]( fileChildrens []FileEntry[Children], directoryChildrens []DirectoryEntry[Children], inCid cid.GenericCid[Self], origData []byte, -) (typ Type, file File[Self, Children], dir Directory[Self, Children], sym Symlink[Self], err error) { - var dataType uint64 - var fileLinks, blocksizes uint - var content []byte - selfTSize := uint64(1) +) (dataType uint64, _ []FileEntry[Children], fileLinks, blocksizes uint, _ []DirectoryEntry[Children], content []byte, selfTSize uint64, _ error) { + selfTSize = 1 data := origData moveZeroNamedDirectoryEntriesToDirectoryChildrens := func(extra int) { @@ -97,8 +94,7 @@ func parsePB[Self, Children cid.Storage]( for len(data) != 0 { // iterate at the root level of the message outerNumber, t, l := protowire.ConsumeTag(data) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } data = data[l:] switch outerNumber { @@ -116,17 +112,17 @@ func parsePB[Self, Children cid.Storage]( // length prefixed message mData, l = protowire.ConsumeBytes(data) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } data = data[l:] // we just extracted the message so walk over it completely default: + var err error if outerNumber == 1 { err = fmt.Errorf("unknown type for Data field %v", t) } else { err = fmt.Errorf("unknown type for Links field %v", t) } - return + return 0, nil, 0, 0, nil, nil, 0, err } var c cid.GenericCid[Children] @@ -136,8 +132,7 @@ func parsePB[Self, Children cid.Storage]( for len(mData) != 0 { n, t, l := protowire.ConsumeTag(mData) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } mData = mData[l:] @@ -151,9 +146,10 @@ func parsePB[Self, Children cid.Storage]( switch n { case 1: // required DataType Type = 1; + var err error mData, dataType, err = pbDecodeNumber(t, mData) if err != nil { - return + return 0, nil, 0, 0, nil, nil, 0, err } // due to how "Last One Wins" we can't do anything meaningfull without fully decoding the message first. @@ -163,14 +159,12 @@ func parsePB[Self, Children cid.Storage]( case protowire.BytesType: content, l = protowire.ConsumeBytes(mData) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } mData = mData[l:] default: - err = fmt.Errorf("unknown type for Data.Data field %v", t) - return + return 0, nil, 0, 0, nil, nil, 0, fmt.Errorf("unknown type for Data.Data field %v", t) } case 4: @@ -196,9 +190,10 @@ func parsePB[Self, Children cid.Storage]( // I mean it works but do other protobuf parsers do this ? case protowire.VarintType, protowire.Fixed64Type, protowire.Fixed32Type: var blocksize uint64 + var err error mData, blocksize, err = pbDecodeNumber(t, mData) if err != nil { - return + return 0, nil, 0, 0, nil, nil, 0, err } addBlocksize(blocksize) @@ -206,16 +201,14 @@ func parsePB[Self, Children cid.Storage]( // packed representation packed, l := protowire.ConsumeBytes(mData) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } mData = mData[l:] for len(packed) != 0 { blocksize, l := protowire.ConsumeVarint(packed) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } packed = packed[l:] @@ -223,14 +216,14 @@ func parsePB[Self, Children cid.Storage]( } default: - err = fmt.Errorf("unknown type for Data.Blocksizes field %v", t) - return + return 0, nil, 0, 0, nil, nil, 0, fmt.Errorf("unknown type for Data.Blocksizes field %v", t) } default: + var err error mData, err = pbHandleUnknownField(t, mData) if err != nil { - return + return 0, nil, 0, 0, nil, nil, 0, err } } } else { @@ -242,19 +235,17 @@ func parsePB[Self, Children cid.Storage]( case protowire.BytesType: cBytes, l := protowire.ConsumeBytes(mData) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } mData = mData[l:] + var err error c, err = cid.CastGeneric[Children](cBytes) if err != nil { - err = fmt.Errorf("failed to decode cid: %w", err) - return + return 0, nil, 0, 0, nil, nil, 0, fmt.Errorf("failed to decode cid: %w", err) } default: - err = fmt.Errorf("unknown type for Links.Hash field %v", t) - return + return 0, nil, 0, 0, nil, nil, 0, fmt.Errorf("unknown type for Links.Hash field %v", t) } case 2: @@ -263,19 +254,21 @@ func parsePB[Self, Children cid.Storage]( case protowire.BytesType: name, l = protowire.ConsumeBytes(mData) if l < 0 { - err = protowire.ParseError(l) - return + return 0, nil, 0, 0, nil, nil, 0, protowire.ParseError(l) } mData = mData[l:] default: - err = fmt.Errorf("unknown type for Links.Name field %v", t) - return + return 0, nil, 0, 0, nil, nil, 0, fmt.Errorf("unknown type for Links.Name field %v", t) } case 3: // optional uint64 Tsize = 3; + var err error mData, tSize, err = pbDecodeNumber(t, mData) + if err != nil { + return 0, nil, 0, 0, nil, nil, 0, err + } if selfTSize != 0 { if tSize == 0 { selfTSize = 0 @@ -286,9 +279,10 @@ func parsePB[Self, Children cid.Storage]( tSize++ default: + var err error mData, err = pbHandleUnknownField(t, mData) if err != nil { - return + return 0, nil, 0, 0, nil, nil, 0, err } } } @@ -297,7 +291,7 @@ func parsePB[Self, Children cid.Storage]( if outerNumber == 2 { // repeated PBLink Links = 2; if !c.Defined() { - err = errors.New("link is missing CID") + return 0, nil, 0, 0, nil, nil, 0, errors.New("link is missing CID") } // note we accept present but empty name entries on files because some historic @@ -305,8 +299,7 @@ func parsePB[Self, Children cid.Storage]( if len(name) != 0 || len(directoryChildrens) != 0 { // Directory entry if blocksizes != 0 { - err = errors.New("mixed use of blocksizes and named links") - return + return 0, nil, 0, 0, nil, nil, 0, errors.New("mixed use of blocksizes and named links") } if len(fileChildrens) != 0 { @@ -337,38 +330,15 @@ func parsePB[Self, Children cid.Storage]( } default: + var err error data, err = pbHandleUnknownField(t, data) if err != nil { - return + return 0, nil, 0, 0, nil, nil, 0, err } } } - switch dataType { - case pbFile: - if len(directoryChildrens) != 0 { - err = errors.New("named links in file") - return - } - - if fileLinks != blocksizes { - err = fmt.Errorf("unmatched links (%d) and blocksizes (%d) sisterlists", uint(len(fileChildrens)), blocksizes) - return - } - - typ = TFile - file = File[Self, Children]{ - Entry: Entry[Self]{Cid: inCid, tSize: selfTSize + uint64(len(origData))}, - Data: content, - Childrens: fileChildrens, - } - - // TODO: directory and symlink - return - default: - err = fmt.Errorf("unknown node type: %d", dataType) - return - } + return dataType, fileChildrens, fileLinks, blocksizes, directoryChildrens, content, selfTSize, nil } // pbHandleUnknownField must be called right after the tag, it will handle diff --git a/unixfs/pb_test.go b/unixfs/pb_test.go index b83d22f0c..f1f6e0802 100644 --- a/unixfs/pb_test.go +++ b/unixfs/pb_test.go @@ -24,8 +24,7 @@ func BenchmarkPB(b *testing.B) { b.ResetTimer() var out []FileEntry[[]byte] for i := b.N; i != 0; i-- { - _, f, _, _, _ := parsePB(out[:0], nil, c, data) - out = f.Childrens + _, out, _, _, _, _, _, _ = parsePB(out[:0], nil, c, data) } } diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index 0a79d61ae..c392f0633 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -10,6 +10,7 @@ package unixfs import ( "errors" + "fmt" blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" @@ -42,7 +43,8 @@ type File[Self, Children cid.Storage] struct { //lint:ignore U1000 this is a badge patern badge Entry[Self] - Data []byte + Data []byte + Childrens []FileEntry[Children] } @@ -134,6 +136,10 @@ func ParseAppend[Self, Children cid.Storage]( fileChildrens = fileChildrens[len(fileChildrens):] directoryChildrens = directoryChildrens[len(directoryChildrens):] + var dataType, selfTSize uint64 + var fileLinks, blocksizes uint + var content []byte + pref := inCid.Prefix() switch c := multicodec.Code(pref.Codec); c { case multicodec.Raw: @@ -148,9 +154,36 @@ func ParseAppend[Self, Children cid.Storage]( } return case multicodec.DagPb: - return parsePB(fileChildrens, directoryChildrens, inCid, data) + dataType, fileChildrens, fileLinks, blocksizes, directoryChildrens, content, selfTSize, err = parsePB(fileChildrens, directoryChildrens, inCid, data) default: err = errors.New("unsupported codec: " + c.String()) return } + if err != nil { + return + } + + if fileLinks != blocksizes { + err = fmt.Errorf("unmatched links (%d) and blocksizes (%d) sisterlists", uint(len(fileChildrens)), blocksizes) + return + } + + switch dataType { + case pbFile: + if len(directoryChildrens) != 0 { + err = errors.New("named links in file") + return + } + + return TFile, File[Self, Children]{ + Entry: Entry[Self]{Cid: inCid, tSize: selfTSize + uint64(len(data))}, + Data: content, + Childrens: fileChildrens, + }, Directory[Self, Children]{}, Symlink[Self]{}, nil + + // TODO: directory and symlink + default: + err = fmt.Errorf("unknown node type: %d", dataType) + return + } } From f4324f840a1f33ef2fc25275edddd7b33dfcf8cf Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sun, 8 Oct 2023 22:00:13 +0200 Subject: [PATCH 18/39] unixfs/feather: implement IPIP402 and IPIP412 --- unixfs/feather/entry.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 5d3ba4001..4b6cb4fc7 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -58,11 +58,10 @@ type downloader struct { } func DownloadFile(c cid.Cid) (io.ReadCloser, error) { - req, err := http.NewRequest("GET", gateway+c.String(), bytes.NewReader(nil)) + req, err := http.NewRequest("GET", gateway+c.String()+"?dag-scope=entity&dups=y", bytes.NewReader(nil)) if err != nil { return nil, err } - // FIXME: Specify ordered DFS with duplicates req.Header.Add("Accept", "application/vnd.ipld.car") resp, err := http.DefaultClient.Do(req) From c78226272f5133b02d46e162539e664eddd14e33 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sun, 8 Oct 2023 22:00:17 +0200 Subject: [PATCH 19/39] unixfs/feather: stop checking for the root CID of received cars --- unixfs/feather/entry.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 4b6cb4fc7..e88dfffc2 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -105,12 +105,6 @@ func DownloadFile(c cid.Cid) (io.ReadCloser, error) { if h.Version != supportedVersion { return nil, fmt.Errorf("unsupported version %d instead of %d", h.Version, supportedVersion) } - if len(h.Roots) != 1 { - return nil, fmt.Errorf("header has more roots than expected %d instead of 1", len(h.Roots)) - } - if h.Roots[0] != c { - return nil, fmt.Errorf("header root don't match, got %s instead of %s", cidStringTruncate(h.Roots[0]), c.String()) - } good = true From 2a7b5310ca1b38bb15d1e6e1c9da1ba5f5a92f00 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Mon, 9 Oct 2023 23:08:36 +0200 Subject: [PATCH 20/39] cmd/feather: fix capitalised errors --- cmd/feather/main.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/feather/main.go b/cmd/feather/main.go index 27d1e50bb..6f4e62cd6 100644 --- a/cmd/feather/main.go +++ b/cmd/feather/main.go @@ -40,13 +40,13 @@ Example: r, err := feather.DownloadFile(c) if err != nil { - return fmt.Errorf("Error starting file download: %w", err) + return fmt.Errorf("error starting file download: %w", err) } defer r.Close() _, err = io.Copy(os.Stdout, r) if err != nil { - return fmt.Errorf("Error downloading file: %w", err) + return fmt.Errorf("error downloading file: %w", err) } return nil } From f7b2c409c3e7cda683402cb418aec13a8def622c Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 10 Nov 2023 10:01:15 +0100 Subject: [PATCH 21/39] feather: fix IPIP-412 request --- unixfs/feather/entry.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index e88dfffc2..916fa5d20 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -58,11 +58,11 @@ type downloader struct { } func DownloadFile(c cid.Cid) (io.ReadCloser, error) { - req, err := http.NewRequest("GET", gateway+c.String()+"?dag-scope=entity&dups=y", bytes.NewReader(nil)) + req, err := http.NewRequest("GET", gateway+c.String()+"?dag-scope=entity", bytes.NewReader(nil)) if err != nil { return nil, err } - req.Header.Add("Accept", "application/vnd.ipld.car") + req.Header.Add("Accept", "application/vnd.ipld.car;dups=y;order=dfs;version=1") resp, err := http.DefaultClient.Do(req) if err != nil { From ecedd73b16eaadd290e6ef00e58a7d9ea95431ff Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 10 Nov 2023 10:01:38 +0100 Subject: [PATCH 22/39] unixfs: fix UnmarshalText --- unixfs/json.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unixfs/json.go b/unixfs/json.go index 28701123d..96921fe2b 100644 --- a/unixfs/json.go +++ b/unixfs/json.go @@ -27,7 +27,7 @@ func (s *AliasableString) UnmarshalText(b []byte) error { // UnmarshalText must copy the text if it wishes to retain the text after returning. new := make([]byte, len(b)) copy(new, b) - *s = b + *s = new return nil } From f21df27d6e2b8284adc0dae3c9ce6ed981f916d1 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 10 Nov 2023 10:13:55 +0100 Subject: [PATCH 23/39] unixfs: change Node to always use string like CIDs Node is the easy to use API, don't confuse peoples with aliasable CIDs. --- unixfs/unixfs.go | 14 +++++++------- unixfs/unixfs_test.go | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index c392f0633..4d358f002 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -37,7 +37,7 @@ func (e Entry[S]) Untyped() Entry[S] { return e } -var _ Node[string] = File[string, string]{} +var _ Node = File[string, string]{} type File[Self, Children cid.Storage] struct { //lint:ignore U1000 this is a badge patern @@ -58,7 +58,7 @@ type FileEntry[S cid.Storage] struct { FileSize uint64 } -var _ Node[string] = Directory[string, string]{} +var _ Node = Directory[string, string]{} type Directory[Self, Children cid.Storage] struct { //lint:ignore U1000 this is a badge patern @@ -72,7 +72,7 @@ type DirectoryEntry[S cid.Storage] struct { Name AliasableString } -var _ Node[string] = Symlink[string]{} +var _ Node = Symlink[string]{} type Symlink[S cid.Storage] struct { //lint:ignore U1000 this is a badge patern @@ -92,9 +92,9 @@ func (badge) nodeBadge() { // Node is an interface that can exclusively be a [File], [Directory] or [Symlink]. We might add more in the future. // You MUST NOT embed this interface, it's only purpose is to provide type safe enums. -type Node[S cid.Storage] interface { +type Node interface { // Untyped returns the untyped [Entry] for that value stripped of all type related information. - Untyped() Entry[S] + Untyped() Entry[string] // nodeBadge must never be called it's just here to trick the type checker. nodeBadge() } @@ -103,8 +103,8 @@ type Node[S cid.Storage] interface { // [File.Data], [DirectoryEntry.Name] and [Symlink.Value] values are aliased to b.RawData(). // The data argument MUST hash to cid, this wont check the validaty of the hash. // It assumes the size of the block is limited and reasonable. -func Parse[Children cid.Storage](b blocks.Block) (Node[string], error) { - switch t, f, d, s, err := ParseAppend[string, Children](nil, nil, b.Cid(), b.RawData()); t { +func Parse(b blocks.Block) (Node, error) { + switch t, f, d, s, err := ParseAppend[string, string](nil, nil, b.Cid(), b.RawData()); t { case TError: return nil, err case TFile: diff --git a/unixfs/unixfs_test.go b/unixfs/unixfs_test.go index 09320f480..b6495f432 100644 --- a/unixfs/unixfs_test.go +++ b/unixfs/unixfs_test.go @@ -45,7 +45,7 @@ func TestRaw(t *testing.T) { if err != nil { t.Fatal(err) } - a, err := Parse[string](b) + a, err := Parse(b) if err != nil { t.Fatal(err) } @@ -121,7 +121,7 @@ func TestFilePB(t *testing.T) { if err != nil { t.Fatal(err) } - a, err := Parse[string](b) + a, err := Parse(b) if err != nil { t.Fatal(err) } From d7e2f951498af637be24af06ff699a1e3c56e634 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 10 Nov 2023 12:46:30 +0100 Subject: [PATCH 24/39] unixfs/feather: make use of boxo/unixfs easy api --- unixfs/feather/entry.go | 102 ++---- unixfs/feather/internal/pb/Makefile | 11 - unixfs/feather/internal/pb/file.pb.go | 488 -------------------------- unixfs/feather/internal/pb/file.proto | 47 --- 4 files changed, 31 insertions(+), 617 deletions(-) delete mode 100644 unixfs/feather/internal/pb/Makefile delete mode 100644 unixfs/feather/internal/pb/file.pb.go delete mode 100644 unixfs/feather/internal/pb/file.proto diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 916fa5d20..2777d5e00 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -8,9 +8,9 @@ import ( "io" "net/http" - pb "github.com/ipfs/boxo/unixfs/feather/internal/pb" + "github.com/ipfs/boxo/unixfs" + blocks "github.com/ipfs/go-block-format" "golang.org/x/exp/slices" - "google.golang.org/protobuf/proto" "github.com/ipfs/boxo/verifcid" "github.com/ipfs/go-cid" @@ -207,81 +207,41 @@ func (d *downloader) Read(b []byte) (int, error) { } } - switch pref.Codec { - case cid.Raw: - if todo.rangeKnown { - if uint64(len(data)) != todo.size { - return 0, fmt.Errorf("leaf isn't size is incorrect for %s, expected %d; got %d", cidStringTruncate(c), todo.size, len(data)) - } - } - d.curBlock = data - case cid.DagProtobuf: - var block pb.PBNode - err := proto.Unmarshal(data, &block) - if err != nil { - return 0, fmt.Errorf("parsing block for %s: %w", cidStringTruncate(c), err) - } - - if len(block.Data) == 0 { - return 0, fmt.Errorf("block %s is missing Data field", cidStringTruncate(c)) - } - - var metadata pb.UnixfsData - err = proto.Unmarshal(block.Data, &metadata) - if err != nil { - return 0, fmt.Errorf("parsing metadata for %s: %w", cidStringTruncate(c), err) - } - - if metadata.Type == nil { - return 0, fmt.Errorf("missing unixfs node Type for %s", cidStringTruncate(c)) - } - switch *metadata.Type { - case pb.UnixfsData_File: - blocksizes := metadata.Blocksizes - links := block.Links - if len(blocksizes) != len(links) { - return 0, fmt.Errorf("inconsistent sisterlists for %s, %d vs %d", cidStringTruncate(c), len(blocksizes), len(links)) - } + b, err := blocks.NewBlockWithCid(data, c) + if err != nil { + return 0, err + } + node, err := unixfs.Parse(b) + if err != nil { + return 0, err + } - d.curBlock = metadata.Data - - filesize := uint64(len(metadata.Data)) - if len(blocksizes) != 0 { - regions := slices.Grow(d.state, len(blocksizes)) - for i := len(blocksizes); i > 0; { - i-- - bs := blocksizes[i] - subCid, err := loadCidFromBytes(links[i].Hash) - if err != nil { - return 0, fmt.Errorf("link %d of %s: %w", i, cidStringTruncate(c), err) - } - - regions = append(regions, region{ - c: subCid, - size: bs, - rangeKnown: true, - }) - filesize += bs - } - d.state = regions + switch n := node.(type) { + case unixfs.File[string, string]: + d.curBlock = n.Data + + filesize := uint64(len(n.Data)) + if childs := n.Childrens; len(childs) != 0 { + regions := slices.Grow(d.state, len(childs)) + for i := len(childs); i > 0; { + i-- + regions = append(regions, region{ + c: childs[i].Cid, + size: childs[i].FileSize, + rangeKnown: true, + }) + filesize += childs[i].FileSize } + d.state = regions + } - if todo.rangeKnown { - if todo.size != filesize { - return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), todo.size, filesize) - } - } - if metadata.Filesize != nil { - if *metadata.Filesize != filesize { - return 0, fmt.Errorf("inconsistent Filesize metadata field for %s, expected %d; got %d", cidStringTruncate(c), filesize, *metadata.Filesize) - } + if todo.rangeKnown { + if todo.size != filesize { + return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), todo.size, filesize) } - default: - return 0, fmt.Errorf("unkown unixfs node type for %s: %s", cidStringTruncate(c), metadata.Type.String()) } - default: - return 0, fmt.Errorf("unknown codec type %d for %s; expected Raw or Dag-PB", pref.Codec, cidStringTruncate(c)) + return 0, fmt.Errorf("unknown unixfs type, got %T for %s", node, cidStringTruncate(c)) } good = true diff --git a/unixfs/feather/internal/pb/Makefile b/unixfs/feather/internal/pb/Makefile deleted file mode 100644 index 1cb32f3df..000000000 --- a/unixfs/feather/internal/pb/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -PB = $(wildcard *.proto) -GO = $(PB:.proto=.pb.go) - -all: $(GO) - -%.pb.go: %.proto - protoc --proto_path=$(GOPATH)/src:. --go_out=. $< - -clean: - rm -f *.pb.go - rm -f *.go diff --git a/unixfs/feather/internal/pb/file.pb.go b/unixfs/feather/internal/pb/file.pb.go deleted file mode 100644 index a4863104c..000000000 --- a/unixfs/feather/internal/pb/file.pb.go +++ /dev/null @@ -1,488 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.27.1 -// protoc v3.10.1 -// source: file.proto - -package __ - -import ( - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -type UnixfsData_DataType int32 - -const ( - UnixfsData_Raw UnixfsData_DataType = 0 - UnixfsData_Directory UnixfsData_DataType = 1 - UnixfsData_File UnixfsData_DataType = 2 - UnixfsData_Metadata UnixfsData_DataType = 3 - UnixfsData_Symlink UnixfsData_DataType = 4 - UnixfsData_HAMTShard UnixfsData_DataType = 5 -) - -// Enum value maps for UnixfsData_DataType. -var ( - UnixfsData_DataType_name = map[int32]string{ - 0: "Raw", - 1: "Directory", - 2: "File", - 3: "Metadata", - 4: "Symlink", - 5: "HAMTShard", - } - UnixfsData_DataType_value = map[string]int32{ - "Raw": 0, - "Directory": 1, - "File": 2, - "Metadata": 3, - "Symlink": 4, - "HAMTShard": 5, - } -) - -func (x UnixfsData_DataType) Enum() *UnixfsData_DataType { - p := new(UnixfsData_DataType) - *p = x - return p -} - -func (x UnixfsData_DataType) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (UnixfsData_DataType) Descriptor() protoreflect.EnumDescriptor { - return file_file_proto_enumTypes[0].Descriptor() -} - -func (UnixfsData_DataType) Type() protoreflect.EnumType { - return &file_file_proto_enumTypes[0] -} - -func (x UnixfsData_DataType) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Do not use. -func (x *UnixfsData_DataType) UnmarshalJSON(b []byte) error { - num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) - if err != nil { - return err - } - *x = UnixfsData_DataType(num) - return nil -} - -// Deprecated: Use UnixfsData_DataType.Descriptor instead. -func (UnixfsData_DataType) EnumDescriptor() ([]byte, []int) { - return file_file_proto_rawDescGZIP(), []int{0, 0} -} - -type UnixfsData struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Type *UnixfsData_DataType `protobuf:"varint,1,req,name=Type,enum=pb.UnixfsData_DataType" json:"Type,omitempty"` - Data []byte `protobuf:"bytes,2,opt,name=Data" json:"Data,omitempty"` - Filesize *uint64 `protobuf:"varint,3,opt,name=filesize" json:"filesize,omitempty"` - Blocksizes []uint64 `protobuf:"varint,4,rep,name=blocksizes" json:"blocksizes,omitempty"` - HashType *uint64 `protobuf:"varint,5,opt,name=hashType" json:"hashType,omitempty"` - Fanout *uint64 `protobuf:"varint,6,opt,name=fanout" json:"fanout,omitempty"` -} - -func (x *UnixfsData) Reset() { - *x = UnixfsData{} - if protoimpl.UnsafeEnabled { - mi := &file_file_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *UnixfsData) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*UnixfsData) ProtoMessage() {} - -func (x *UnixfsData) ProtoReflect() protoreflect.Message { - mi := &file_file_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use UnixfsData.ProtoReflect.Descriptor instead. -func (*UnixfsData) Descriptor() ([]byte, []int) { - return file_file_proto_rawDescGZIP(), []int{0} -} - -func (x *UnixfsData) GetType() UnixfsData_DataType { - if x != nil && x.Type != nil { - return *x.Type - } - return UnixfsData_Raw -} - -func (x *UnixfsData) GetData() []byte { - if x != nil { - return x.Data - } - return nil -} - -func (x *UnixfsData) GetFilesize() uint64 { - if x != nil && x.Filesize != nil { - return *x.Filesize - } - return 0 -} - -func (x *UnixfsData) GetBlocksizes() []uint64 { - if x != nil { - return x.Blocksizes - } - return nil -} - -func (x *UnixfsData) GetHashType() uint64 { - if x != nil && x.HashType != nil { - return *x.HashType - } - return 0 -} - -func (x *UnixfsData) GetFanout() uint64 { - if x != nil && x.Fanout != nil { - return *x.Fanout - } - return 0 -} - -type Metadata struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - MimeType *string `protobuf:"bytes,1,opt,name=MimeType" json:"MimeType,omitempty"` -} - -func (x *Metadata) Reset() { - *x = Metadata{} - if protoimpl.UnsafeEnabled { - mi := &file_file_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *Metadata) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*Metadata) ProtoMessage() {} - -func (x *Metadata) ProtoReflect() protoreflect.Message { - mi := &file_file_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use Metadata.ProtoReflect.Descriptor instead. -func (*Metadata) Descriptor() ([]byte, []int) { - return file_file_proto_rawDescGZIP(), []int{1} -} - -func (x *Metadata) GetMimeType() string { - if x != nil && x.MimeType != nil { - return *x.MimeType - } - return "" -} - -type PBLink struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - // binary CID (with no multibase prefix) of the target object - Hash []byte `protobuf:"bytes,1,opt,name=Hash" json:"Hash,omitempty"` - // UTF-8 string name - Name *string `protobuf:"bytes,2,opt,name=Name" json:"Name,omitempty"` - // cumulative size of target object - Tsize *uint64 `protobuf:"varint,3,opt,name=Tsize" json:"Tsize,omitempty"` -} - -func (x *PBLink) Reset() { - *x = PBLink{} - if protoimpl.UnsafeEnabled { - mi := &file_file_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *PBLink) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PBLink) ProtoMessage() {} - -func (x *PBLink) ProtoReflect() protoreflect.Message { - mi := &file_file_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PBLink.ProtoReflect.Descriptor instead. -func (*PBLink) Descriptor() ([]byte, []int) { - return file_file_proto_rawDescGZIP(), []int{2} -} - -func (x *PBLink) GetHash() []byte { - if x != nil { - return x.Hash - } - return nil -} - -func (x *PBLink) GetName() string { - if x != nil && x.Name != nil { - return *x.Name - } - return "" -} - -func (x *PBLink) GetTsize() uint64 { - if x != nil && x.Tsize != nil { - return *x.Tsize - } - return 0 -} - -type PBNode struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - // refs to other objects - Links []*PBLink `protobuf:"bytes,2,rep,name=Links" json:"Links,omitempty"` - // opaque user data - Data []byte `protobuf:"bytes,1,opt,name=Data" json:"Data,omitempty"` -} - -func (x *PBNode) Reset() { - *x = PBNode{} - if protoimpl.UnsafeEnabled { - mi := &file_file_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *PBNode) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*PBNode) ProtoMessage() {} - -func (x *PBNode) ProtoReflect() protoreflect.Message { - mi := &file_file_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use PBNode.ProtoReflect.Descriptor instead. -func (*PBNode) Descriptor() ([]byte, []int) { - return file_file_proto_rawDescGZIP(), []int{3} -} - -func (x *PBNode) GetLinks() []*PBLink { - if x != nil { - return x.Links - } - return nil -} - -func (x *PBNode) GetData() []byte { - if x != nil { - return x.Data - } - return nil -} - -var File_file_proto protoreflect.FileDescriptor - -var file_file_proto_rawDesc = []byte{ - 0x0a, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x02, 0x70, 0x62, - 0x22, 0x95, 0x02, 0x0a, 0x0a, 0x75, 0x6e, 0x69, 0x78, 0x66, 0x73, 0x44, 0x61, 0x74, 0x61, 0x12, - 0x2b, 0x0a, 0x04, 0x54, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x02, 0x28, 0x0e, 0x32, 0x17, 0x2e, - 0x70, 0x62, 0x2e, 0x75, 0x6e, 0x69, 0x78, 0x66, 0x73, 0x44, 0x61, 0x74, 0x61, 0x2e, 0x44, 0x61, - 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x04, 0x54, 0x79, 0x70, 0x65, 0x12, 0x12, 0x0a, 0x04, - 0x44, 0x61, 0x74, 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x44, 0x61, 0x74, 0x61, - 0x12, 0x1a, 0x0a, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x69, 0x7a, 0x65, 0x12, 0x1e, 0x0a, 0x0a, - 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x69, 0x7a, 0x65, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x04, - 0x52, 0x0a, 0x62, 0x6c, 0x6f, 0x63, 0x6b, 0x73, 0x69, 0x7a, 0x65, 0x73, 0x12, 0x1a, 0x0a, 0x08, - 0x68, 0x61, 0x73, 0x68, 0x54, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x04, 0x52, 0x08, - 0x68, 0x61, 0x73, 0x68, 0x54, 0x79, 0x70, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x61, 0x6e, 0x6f, - 0x75, 0x74, 0x18, 0x06, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x66, 0x61, 0x6e, 0x6f, 0x75, 0x74, - 0x22, 0x56, 0x0a, 0x08, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x07, 0x0a, 0x03, - 0x52, 0x61, 0x77, 0x10, 0x00, 0x12, 0x0d, 0x0a, 0x09, 0x44, 0x69, 0x72, 0x65, 0x63, 0x74, 0x6f, - 0x72, 0x79, 0x10, 0x01, 0x12, 0x08, 0x0a, 0x04, 0x46, 0x69, 0x6c, 0x65, 0x10, 0x02, 0x12, 0x0c, - 0x0a, 0x08, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x10, 0x03, 0x12, 0x0b, 0x0a, 0x07, - 0x53, 0x79, 0x6d, 0x6c, 0x69, 0x6e, 0x6b, 0x10, 0x04, 0x12, 0x0d, 0x0a, 0x09, 0x48, 0x41, 0x4d, - 0x54, 0x53, 0x68, 0x61, 0x72, 0x64, 0x10, 0x05, 0x22, 0x26, 0x0a, 0x08, 0x4d, 0x65, 0x74, 0x61, - 0x64, 0x61, 0x74, 0x61, 0x12, 0x1a, 0x0a, 0x08, 0x4d, 0x69, 0x6d, 0x65, 0x54, 0x79, 0x70, 0x65, - 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x4d, 0x69, 0x6d, 0x65, 0x54, 0x79, 0x70, 0x65, - 0x22, 0x46, 0x0a, 0x06, 0x50, 0x42, 0x4c, 0x69, 0x6e, 0x6b, 0x12, 0x12, 0x0a, 0x04, 0x48, 0x61, - 0x73, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x48, 0x61, 0x73, 0x68, 0x12, 0x12, - 0x0a, 0x04, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x4e, 0x61, - 0x6d, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x54, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x04, 0x52, 0x05, 0x54, 0x73, 0x69, 0x7a, 0x65, 0x22, 0x3e, 0x0a, 0x06, 0x50, 0x42, 0x4e, 0x6f, - 0x64, 0x65, 0x12, 0x20, 0x0a, 0x05, 0x4c, 0x69, 0x6e, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x0a, 0x2e, 0x70, 0x62, 0x2e, 0x50, 0x42, 0x4c, 0x69, 0x6e, 0x6b, 0x52, 0x05, 0x4c, - 0x69, 0x6e, 0x6b, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x44, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0c, 0x52, 0x04, 0x44, 0x61, 0x74, 0x61, 0x42, 0x03, 0x5a, 0x01, 0x2e, -} - -var ( - file_file_proto_rawDescOnce sync.Once - file_file_proto_rawDescData = file_file_proto_rawDesc -) - -func file_file_proto_rawDescGZIP() []byte { - file_file_proto_rawDescOnce.Do(func() { - file_file_proto_rawDescData = protoimpl.X.CompressGZIP(file_file_proto_rawDescData) - }) - return file_file_proto_rawDescData -} - -var file_file_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_file_proto_msgTypes = make([]protoimpl.MessageInfo, 4) -var file_file_proto_goTypes = []interface{}{ - (UnixfsData_DataType)(0), // 0: pb.unixfsData.DataType - (*UnixfsData)(nil), // 1: pb.unixfsData - (*Metadata)(nil), // 2: pb.Metadata - (*PBLink)(nil), // 3: pb.PBLink - (*PBNode)(nil), // 4: pb.PBNode -} -var file_file_proto_depIdxs = []int32{ - 0, // 0: pb.unixfsData.Type:type_name -> pb.unixfsData.DataType - 3, // 1: pb.PBNode.Links:type_name -> pb.PBLink - 2, // [2:2] is the sub-list for method output_type - 2, // [2:2] is the sub-list for method input_type - 2, // [2:2] is the sub-list for extension type_name - 2, // [2:2] is the sub-list for extension extendee - 0, // [0:2] is the sub-list for field type_name -} - -func init() { file_file_proto_init() } -func file_file_proto_init() { - if File_file_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_file_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*UnixfsData); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_file_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Metadata); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_file_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PBLink); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_file_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*PBNode); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_file_proto_rawDesc, - NumEnums: 1, - NumMessages: 4, - NumExtensions: 0, - NumServices: 0, - }, - GoTypes: file_file_proto_goTypes, - DependencyIndexes: file_file_proto_depIdxs, - EnumInfos: file_file_proto_enumTypes, - MessageInfos: file_file_proto_msgTypes, - }.Build() - File_file_proto = out.File - file_file_proto_rawDesc = nil - file_file_proto_goTypes = nil - file_file_proto_depIdxs = nil -} diff --git a/unixfs/feather/internal/pb/file.proto b/unixfs/feather/internal/pb/file.proto deleted file mode 100644 index 3792da43a..000000000 --- a/unixfs/feather/internal/pb/file.proto +++ /dev/null @@ -1,47 +0,0 @@ -syntax = "proto2"; - -package pb; - -option go_package = "."; - -message unixfsData { - enum DataType { - Raw = 0; - Directory = 1; - File = 2; - Metadata = 3; - Symlink = 4; - HAMTShard = 5; - } - - required DataType Type = 1; - optional bytes Data = 2; - optional uint64 filesize = 3; - repeated uint64 blocksizes = 4; - - optional uint64 hashType = 5; - optional uint64 fanout = 6; -} - -message Metadata { - optional string MimeType = 1; -} - -message PBLink { - // binary CID (with no multibase prefix) of the target object - optional bytes Hash = 1; - - // UTF-8 string name - optional string Name = 2; - - // cumulative size of target object - optional uint64 Tsize = 3; -} - -message PBNode { - // refs to other objects - repeated PBLink Links = 2; - - // opaque user data - optional bytes Data = 1; -} From d6bb1dd812373aea862c019bd054ca87c8766671 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Fri, 10 Nov 2023 12:46:52 +0100 Subject: [PATCH 25/39] unixfs/feather: normalize CIDs v0 to v1 --- unixfs/feather/entry.go | 11 +++++++++++ unixfs/unixfs.go | 1 + 2 files changed, 12 insertions(+) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 2777d5e00..2e3cfa388 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -58,6 +58,8 @@ type downloader struct { } func DownloadFile(c cid.Cid) (io.ReadCloser, error) { + c = normalizeCidv0(c) + req, err := http.NewRequest("GET", gateway+c.String()+"?dag-scope=entity", bytes.NewReader(nil)) if err != nil { return nil, err @@ -149,6 +151,7 @@ func (d *downloader) Read(b []byte) (int, error) { var data []byte c := todo.c + c = normalizeCidv0(c) pref := c.Prefix() switch pref.MhType { @@ -201,6 +204,7 @@ func (d *downloader) Read(b []byte) (int, error) { if err != nil { return 0, fmt.Errorf("hashing data for %s: %w", cidStringTruncate(c), err) } + cidGot = normalizeCidv0(cidGot) if cidGot != c { return 0, fmt.Errorf("data integrity failed, expected %s; got %s", cidStringTruncate(c), cidStringTruncate(cidGot)) @@ -255,3 +259,10 @@ func (d *downloader) Read(b []byte) (int, error) { return n, nil } + +func normalizeCidv0(c cid.Cid) cid.Cid { + if c.Version() == 0 { + return cid.NewCidV1(cid.DagProtobuf, c.Hash()) + } + return c +} diff --git a/unixfs/unixfs.go b/unixfs/unixfs.go index 4d358f002..69da77ee7 100644 --- a/unixfs/unixfs.go +++ b/unixfs/unixfs.go @@ -160,6 +160,7 @@ func ParseAppend[Self, Children cid.Storage]( return } if err != nil { + err = fmt.Errorf("failed to parse: %w", err) return } From 2268a17731f3463449f6489ca9c37c3ce9757462 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 11 Nov 2023 14:41:24 +0100 Subject: [PATCH 26/39] unixfs/feather: remove useless early gc --- unixfs/feather/entry.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 2e3cfa388..fef75ee67 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -253,9 +253,6 @@ func (d *downloader) Read(b []byte) (int, error) { n := copy(b, d.curBlock) d.curBlock = d.curBlock[n:] - if len(d.curBlock) == 0 { - d.curBlock = nil // early gc - } return n, nil } From 00cd2331a6d53e17f52b24e96da71ef659bf3cfc Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 11 Nov 2023 14:42:20 +0100 Subject: [PATCH 27/39] unixfs/feather: cleanup if branch --- unixfs/feather/entry.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index fef75ee67..ba37203dc 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -239,10 +239,8 @@ func (d *downloader) Read(b []byte) (int, error) { d.state = regions } - if todo.rangeKnown { - if todo.size != filesize { - return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), todo.size, filesize) - } + if todo.rangeKnown && todo.size != filesize { + return 0, fmt.Errorf("inconsistent filesize for %s, expected %d; got %d", cidStringTruncate(c), todo.size, filesize) } default: return 0, fmt.Errorf("unknown unixfs type, got %T for %s", node, cidStringTruncate(c)) From 1b0f7942429d350fee7ad1ec9f07cf6beed92820 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 11 Nov 2023 15:49:00 +0100 Subject: [PATCH 28/39] unixfs/feather: change semantics of Read to not close on error This used to be buggy because defer would be called in the loop. --- unixfs/feather/entry.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index ba37203dc..ef222d682 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -57,6 +57,8 @@ type downloader struct { curBlock []byte } +// DownloadFile takes in a [cid.Cid] and return an [io.ReadCloser] which streams the deserialized file. +// You MUST always call the Close method when you are done using it else it would leak resources. func DownloadFile(c cid.Cid) (io.ReadCloser, error) { c = normalizeCidv0(c) @@ -137,13 +139,6 @@ func (d *downloader) Read(b []byte) (int, error) { return 0, io.EOF } - var good bool - defer func() { - if !good { - d.Close() - } - }() - // pop current item from the DFS stack last := len(d.state) - 1 todo := d.state[last] @@ -245,8 +240,6 @@ func (d *downloader) Read(b []byte) (int, error) { default: return 0, fmt.Errorf("unknown unixfs type, got %T for %s", node, cidStringTruncate(c)) } - - good = true } n := copy(b, d.curBlock) From 90b000107a95fdd84946e31cffe4ca8bccb2fa9e Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 11 Nov 2023 15:53:40 +0100 Subject: [PATCH 29/39] unixfs/feather: harden against Read being called after an error happend --- unixfs/feather/entry.go | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index ef222d682..e81945404 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -55,6 +55,7 @@ type downloader struct { buf bufio.Reader state []region curBlock []byte + readErr error } // DownloadFile takes in a [cid.Cid] and return an [io.ReadCloser] which streams the deserialized file. @@ -131,7 +132,13 @@ func loadCidFromBytes(cidBytes []byte) (cid.Cid, error) { return c, nil } -func (d *downloader) Read(b []byte) (int, error) { +func (d *downloader) Read(b []byte) (_ int, err error) { + if d.readErr != nil { + return 0, d.readErr + } + defer func() { + d.readErr = err + }() for len(d.curBlock) == 0 { // have to fill more data in the buffer if len(d.state) == 0 { From 969998d33e70b63e98a06882ed10ab6e113f6e4a Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Nov 2023 11:54:55 +0300 Subject: [PATCH 30/39] unixfs/feather: add a basic test --- cmd/feather/main.go | 11 ++- unixfs/feather/entry.go | 56 +++++++++++- unixfs/feather/feather_test.go | 83 ++++++++++++++++++ .../testdata/file-with-many-raw-leaves.car | Bin 0 -> 3593 bytes 4 files changed, 143 insertions(+), 7 deletions(-) create mode 100644 unixfs/feather/feather_test.go create mode 100644 unixfs/feather/testdata/file-with-many-raw-leaves.car diff --git a/cmd/feather/main.go b/cmd/feather/main.go index 6f4e62cd6..321a1b0e8 100644 --- a/cmd/feather/main.go +++ b/cmd/feather/main.go @@ -38,15 +38,20 @@ Example: %s bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi`, err, os.Args[0], os.Args[0]) } - r, err := feather.DownloadFile(c) + f, err := feather.NewClient(feather.WithStaticGateway("http://localhost:8080/")) if err != nil { - return fmt.Errorf("error starting file download: %w", err) + return fmt.Errorf("creating feather client: %w", err) + } + + r, err := f.DownloadFile(c) + if err != nil { + return fmt.Errorf("starting file download: %w", err) } defer r.Close() _, err = io.Copy(os.Stdout, r) if err != nil { - return fmt.Errorf("error downloading file: %w", err) + return fmt.Errorf("downloading file: %w", err) } return nil } diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index e81945404..c0a943a6f 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -4,6 +4,7 @@ import ( "bufio" "bytes" "encoding/binary" + "errors" "fmt" "io" "net/http" @@ -36,7 +37,6 @@ func init() { cbor.RegisterCborType(carHeader{}) } -const gateway = "http://localhost:8080/ipfs/" const maxHeaderSize = 32 * 1024 * 1024 // 32MiB const maxBlockSize = 2 * 1024 * 1024 // 2MiB const maxCidSize = 4096 @@ -58,18 +58,66 @@ type downloader struct { readErr error } +type Client struct { + httpClient *http.Client + hostname string +} + +type Option func(*Client) error + +// WithHTTPClient allows to use a [http.Client] of your choice. +func WithHTTPClient(client *http.Client) Option { + return func(c *Client) error { + c.httpClient = client + return nil + } +} + +// WithStaticGateway sets a static gateway which will be used for all requests. +func WithStaticGateway(gateway string) Option { + if len(gateway) != 0 && gateway[len(gateway)-1] == '/' { + gateway = gateway[:len(gateway)-1] + } + gateway += "/ipfs/" + + return func(c *Client) error { + c.hostname = gateway + return nil + } +} + +var ErrNoAvailableDataSource = errors.New("no data source") + +func NewClient(opts ...Option) (*Client, error) { + c := &Client{ + httpClient: http.DefaultClient, + } + + for _, opt := range opts { + if err := opt(c); err != nil { + return nil, err + } + } + + if c.hostname == "" { + return nil, ErrNoAvailableDataSource + } + + return c, nil +} + // DownloadFile takes in a [cid.Cid] and return an [io.ReadCloser] which streams the deserialized file. // You MUST always call the Close method when you are done using it else it would leak resources. -func DownloadFile(c cid.Cid) (io.ReadCloser, error) { +func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { c = normalizeCidv0(c) - req, err := http.NewRequest("GET", gateway+c.String()+"?dag-scope=entity", bytes.NewReader(nil)) + req, err := http.NewRequest("GET", client.hostname+c.String()+"?dag-scope=entity", bytes.NewReader(nil)) if err != nil { return nil, err } req.Header.Add("Accept", "application/vnd.ipld.car;dups=y;order=dfs;version=1") - resp, err := http.DefaultClient.Do(req) + resp, err := client.httpClient.Do(req) if err != nil { return nil, err } diff --git a/unixfs/feather/feather_test.go b/unixfs/feather/feather_test.go new file mode 100644 index 000000000..a7d7243b5 --- /dev/null +++ b/unixfs/feather/feather_test.go @@ -0,0 +1,83 @@ +package feather_test + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "io" + "net/http/httptest" + "os" + "path/filepath" + "testing" + + "github.com/ipfs/boxo/blockservice" + "github.com/ipfs/boxo/exchange/offline" + "github.com/ipfs/boxo/gateway" + "github.com/ipfs/boxo/unixfs/feather" + "github.com/ipfs/go-cid" + carblockstore "github.com/ipld/go-car/v2/blockstore" + "github.com/stretchr/testify/assert" +) + +func newGateway(t *testing.T, fixture string) (*httptest.Server, cid.Cid) { + t.Helper() + + r, err := os.Open(filepath.Join("./testdata", fixture)) + assert.NoError(t, err) + + blockStore, err := carblockstore.NewReadOnly(r, nil) + assert.NoError(t, err) + + t.Cleanup(func() { + blockStore.Close() + r.Close() + }) + + cids, err := blockStore.Roots() + assert.NoError(t, err) + assert.Len(t, cids, 1) + + blockService := blockservice.New(blockStore, offline.Exchange(blockStore)) + + backend, err := gateway.NewBlocksBackend(blockService) + assert.NoError(t, err) + + handler := gateway.NewHandler(gateway.Config{}, backend) + + ts := httptest.NewServer(handler) + t.Cleanup(func() { ts.Close() }) + t.Logf("test server url: %s", ts.URL) + + return ts, cids[0] +} + +func newFeather(t *testing.T, fixture string) (*feather.Client, cid.Cid) { + t.Helper() + + gw, cid := newGateway(t, fixture) + f, err := feather.NewClient(feather.WithHTTPClient(gw.Client()), feather.WithStaticGateway(gw.URL)) + assert.NoError(t, err) + return f, cid +} + +func mustParseHex(s string) []byte { + v, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return v +} + +func TestFileWithManyRawLeaves(t *testing.T) { + f, root := newFeather(t, "file-with-many-raw-leaves.car") + file, err := f.DownloadFile(root) + assert.NoError(t, err) + defer func() { assert.NoError(t, file.Close()) }() + h := sha256.New() + _, err = io.Copy(h, file) + assert.NoError(t, err) + + if !bytes.Equal(h.Sum(nil), mustParseHex("5e38d403b548e38fe350410347f6310b757203b19be6cd5323ec3ca56404b387")) { + t.Error("decoded content does not match expected") + } +} diff --git a/unixfs/feather/testdata/file-with-many-raw-leaves.car b/unixfs/feather/testdata/file-with-many-raw-leaves.car new file mode 100644 index 0000000000000000000000000000000000000000..b879d379b64a9f2501273c852908664e1a7f6517 GIT binary patch literal 3593 zcmcCmlv#nSg%2#nHZ0syd&)%}HO^%I>W6$O!znRy%jg zQ4?z+28jj^vJ`V0@9SgleH0YQlz%wCa+18moF#Mlq&Y*SyDoI?Wqi8{#pGWX7aN{) z{o$H!$faNL_}{~@**sb;7uqI=GFGc8)o43F6l;Op%%H%4AFu!wD{u*OFi9*>P-x&l zAcZ9y#CRFM>7%lvAut*O!#D&OA*BSN;%gX(z^K Date: Thu, 16 Nov 2023 16:19:39 +0300 Subject: [PATCH 31/39] unixfs/feather: normalise CIDs from the car --- unixfs/feather/entry.go | 1 + 1 file changed, 1 insertion(+) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index c0a943a6f..7055ce662 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -228,6 +228,7 @@ func (d *downloader) Read(b []byte) (_ int, err error) { if cidLen > maxCidSize { return 0, fmt.Errorf("cidFound for %s is too big at %d bytes", cidStringTruncate(c), cidLen) } + cidFound = normalizeCidv0(cidFound) if cidFound != c { return 0, fmt.Errorf("downloading %s but got %s instead", cidStringTruncate(c), cidStringTruncate(cidFound)) } From eec31dc8e981e2e92895fc71e885a5ae9630d5bd Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Nov 2023 18:10:13 +0300 Subject: [PATCH 32/39] unixfs/feather: stop checking the CAR header version We stopped checking roots, this only provides slightly better error messages when getting unknown car versions. Remove dependency on cbor for this very small value. --- unixfs/feather/entry.go | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 7055ce662..604d2276e 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -15,7 +15,6 @@ import ( "github.com/ipfs/boxo/verifcid" "github.com/ipfs/go-cid" - cbor "github.com/ipfs/go-ipld-cbor" mh "github.com/multiformats/go-multihash" ) @@ -28,15 +27,6 @@ func cidStringTruncate(c cid.Cid) string { return cidStr } -type carHeader struct { - Roots []cid.Cid - Version uint64 -} - -func init() { - cbor.RegisterCborType(carHeader{}) -} - const maxHeaderSize = 32 * 1024 * 1024 // 32MiB const maxBlockSize = 2 * 1024 * 1024 // 2MiB const maxCidSize = 4096 @@ -142,23 +132,11 @@ func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { return nil, fmt.Errorf("header is to big at %d instead of %d", headerSize, maxHeaderSize) } - b := make([]byte, headerSize) - _, err = io.ReadFull(&r.buf, b) - if err != nil { - return nil, err - } - - h := carHeader{} - err = cbor.DecodeInto(b, &h) + _, err = r.buf.Discard(int(headerSize)) if err != nil { return nil, err } - const supportedVersion = 1 - if h.Version != supportedVersion { - return nil, fmt.Errorf("unsupported version %d instead of %d", h.Version, supportedVersion) - } - good = true return r, nil From e9bb00ce5caf0002ed3d91f2492111943d342ea7 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Nov 2023 18:28:19 +0300 Subject: [PATCH 33/39] unixfs/feather: remove incorrect TODO This would make the code tricky to read and I'm not sure it would help in a significant way. --- unixfs/feather/entry.go | 1 - 1 file changed, 1 deletion(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 604d2276e..6068d23b3 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -215,7 +215,6 @@ func (d *downloader) Read(b []byte) (_ int, err error) { if blockSize > maxBlockSize { return 0, fmt.Errorf("block %s is too big (%d) max %d", cidStringTruncate(c), blockSize, maxBlockSize) } - // TODO: fast path read directly into b if len(b) <= blockSize and type is raw data, err = d.buf.Peek(blockSize) if err != nil { if err == io.EOF { From ebb6727f1ffad58e35fec2bff31d685c2d96e15a Mon Sep 17 00:00:00 2001 From: Jorropo Date: Thu, 16 Nov 2023 18:38:22 +0300 Subject: [PATCH 34/39] unixfs/feather: hoist readBlockFromStream to it's own function --- unixfs/feather/entry.go | 150 +++++++++++++++++++++++----------------- 1 file changed, 88 insertions(+), 62 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 6068d23b3..65215fb03 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -40,12 +40,12 @@ type region struct { } type downloader struct { - io.Closer - buf bufio.Reader state []region curBlock []byte readErr error + client *Client + stream io.Closer } type Client struct { @@ -101,15 +101,30 @@ func NewClient(opts ...Option) (*Client, error) { func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { c = normalizeCidv0(c) - req, err := http.NewRequest("GET", client.hostname+c.String()+"?dag-scope=entity", bytes.NewReader(nil)) - if err != nil { + d := &downloader{ + client: client, + state: []region{{c: c}}, + buf: *bufio.NewReaderSize(nil, maxElementSize*2), + } + + if err := d.startStream(); err != nil { return nil, err } + + return d, nil +} + +func (d *downloader) startStream() error { + next := d.state[len(d.state)-1] + req, err := http.NewRequest("GET", d.client.hostname+next.c.String()+"?dag-scope=entity", bytes.NewReader(nil)) + if err != nil { + return err + } req.Header.Add("Accept", "application/vnd.ipld.car;dups=y;order=dfs;version=1") - resp, err := client.httpClient.Do(req) + resp, err := d.client.httpClient.Do(req) if err != nil { - return nil, err + return err } var good bool defer func() { @@ -118,28 +133,25 @@ func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { } }() - r := &downloader{ - Closer: resp.Body, - state: []region{{c: c}}, - } - r.buf = *bufio.NewReaderSize(resp.Body, maxElementSize*2) + d.stream = resp.Body + d.buf.Reset(resp.Body) - headerSize, err := binary.ReadUvarint(&r.buf) + headerSize, err := binary.ReadUvarint(&d.buf) if err != nil { - return nil, err + return err } if headerSize > maxHeaderSize { - return nil, fmt.Errorf("header is to big at %d instead of %d", headerSize, maxHeaderSize) + return fmt.Errorf("header is to big at %d instead of %d", headerSize, maxHeaderSize) } - _, err = r.buf.Discard(int(headerSize)) + _, err = d.buf.Discard(int(headerSize)) if err != nil { - return nil, err + return err } good = true - return r, nil + return nil } func loadCidFromBytes(cidBytes []byte) (cid.Cid, error) { @@ -190,52 +202,9 @@ func (d *downloader) Read(b []byte) (_ int, err error) { if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, c); err != nil { return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) } - itemLenU, err := binary.ReadUvarint(&d.buf) + data, err = d.readBlockFromStream(c) if err != nil { - return 0, err - } - if itemLenU > maxBlockSize+maxCidSize { - return 0, fmt.Errorf("item size (%d) for %s exceed maxBlockSize+maxCidSize (%d)", itemLenU, cidStringTruncate(c), maxBlockSize+maxCidSize) - } - itemLen := int(itemLenU) - - cidLen, cidFound, err := cid.CidFromReader(&d.buf) - if err != nil { - return 0, fmt.Errorf("trying to read %s failed to read cid: %w", cidStringTruncate(c), err) - } - if cidLen > maxCidSize { - return 0, fmt.Errorf("cidFound for %s is too big at %d bytes", cidStringTruncate(c), cidLen) - } - cidFound = normalizeCidv0(cidFound) - if cidFound != c { - return 0, fmt.Errorf("downloading %s but got %s instead", cidStringTruncate(c), cidStringTruncate(cidFound)) - } - - blockSize := itemLen - cidLen - if blockSize > maxBlockSize { - return 0, fmt.Errorf("block %s is too big (%d) max %d", cidStringTruncate(c), blockSize, maxBlockSize) - } - data, err = d.buf.Peek(blockSize) - if err != nil { - if err == io.EOF { - // don't show io.EOF in case peeking is too short - err = io.ErrUnexpectedEOF - } - return 0, fmt.Errorf("peeking at block data for %s verification: %w", cidStringTruncate(c), err) - } - _, err = d.buf.Discard(len(data)) - if err != nil { - return 0, fmt.Errorf("critical: Discard is supposed to always succeed as long as we don't read less than buffered: %w", err) - } - - cidGot, err := pref.Sum(data) - if err != nil { - return 0, fmt.Errorf("hashing data for %s: %w", cidStringTruncate(c), err) - } - cidGot = normalizeCidv0(cidGot) - - if cidGot != c { - return 0, fmt.Errorf("data integrity failed, expected %s; got %s", cidStringTruncate(c), cidStringTruncate(cidGot)) + return 0, fmt.Errorf("reading block: %w", err) } } @@ -281,6 +250,63 @@ func (d *downloader) Read(b []byte) (_ int, err error) { return n, nil } +// readBlockFromStream must return a hash checked block. +func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { + itemLenU, err := binary.ReadUvarint(&d.buf) + if err != nil { + return nil, err + } + if itemLenU > maxBlockSize+maxCidSize { + return nil, fmt.Errorf("item size (%d) for %s exceed maxBlockSize+maxCidSize (%d)", itemLenU, cidStringTruncate(expectedCid), maxBlockSize+maxCidSize) + } + itemLen := int(itemLenU) + + cidLen, cidFound, err := cid.CidFromReader(&d.buf) + if err != nil { + return nil, fmt.Errorf("trying to read %s failed to read cid: %w", cidStringTruncate(expectedCid), err) + } + if cidLen > maxCidSize { + return nil, fmt.Errorf("cidFound for %s is too big at %d bytes", cidStringTruncate(expectedCid), cidLen) + } + cidFound = normalizeCidv0(cidFound) + if cidFound != expectedCid { + return nil, fmt.Errorf("downloading %s but got %s instead", cidStringTruncate(expectedCid), cidStringTruncate(cidFound)) + } + + blockSize := itemLen - cidLen + if blockSize > maxBlockSize { + return nil, fmt.Errorf("block %s is too big (%d) max %d", cidStringTruncate(expectedCid), blockSize, maxBlockSize) + } + data, err := d.buf.Peek(blockSize) + if err != nil { + if err == io.EOF { + // don't show io.EOF in case peeking is too short + err = io.ErrUnexpectedEOF + } + return nil, fmt.Errorf("peeking at block data for %s verification: %w", cidStringTruncate(expectedCid), err) + } + _, err = d.buf.Discard(len(data)) + if err != nil { + return nil, fmt.Errorf("critical: Discard is supposed to always succeed as long as we don't read less than buffered: %w", err) + } + + cidGot, err := expectedCid.Prefix().Sum(data) + if err != nil { + return nil, fmt.Errorf("hashing data for %s: %w", cidStringTruncate(expectedCid), err) + } + cidGot = normalizeCidv0(cidGot) + + if cidGot != expectedCid { + return nil, fmt.Errorf("data integrity failed, expected %s; got %s", cidStringTruncate(expectedCid), cidStringTruncate(cidGot)) + } + + return data, nil +} + +func (d *downloader) Close() error { + return d.stream.Close() +} + func normalizeCidv0(c cid.Cid) cid.Cid { if c.Version() == 0 { return cid.NewCidV1(cid.DagProtobuf, c.Hash()) From 6cdc2756d8605515693b75fa5d0ebc02583d4f1d Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 18 Nov 2023 14:49:44 +0300 Subject: [PATCH 35/39] unixfs/feather: add WithRetries option --- unixfs/feather/entry.go | 76 +++++++++++++++++++++++++++++------------ 1 file changed, 54 insertions(+), 22 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 65215fb03..961c28aa2 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "math" "net/http" "github.com/ipfs/boxo/unixfs" @@ -40,17 +41,19 @@ type region struct { } type downloader struct { - buf bufio.Reader - state []region - curBlock []byte - readErr error - client *Client - stream io.Closer + buf bufio.Reader + state []region + curBlock []byte + readErr error + client *Client + remainingAttempts uint + stream io.Closer } type Client struct { httpClient *http.Client hostname string + retries uint } type Option func(*Client) error @@ -63,6 +66,15 @@ func WithHTTPClient(client *http.Client) Option { } } +// WithRetries allows to specify how many times we should retry. +// [math.MaxUint] indicate infinite. +func WithRetries(n uint) Option { + return func(c *Client) error { + c.retries = n + return nil + } +} + // WithStaticGateway sets a static gateway which will be used for all requests. func WithStaticGateway(gateway string) Option { if len(gateway) != 0 && gateway[len(gateway)-1] == '/' { @@ -101,22 +113,22 @@ func NewClient(opts ...Option) (*Client, error) { func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { c = normalizeCidv0(c) - d := &downloader{ - client: client, - state: []region{{c: c}}, - buf: *bufio.NewReaderSize(nil, maxElementSize*2), + attempts := client.retries + if attempts != math.MaxUint { + attempts++ } - - if err := d.startStream(); err != nil { - return nil, err + d := &downloader{ + client: client, + state: []region{{c: c}}, + buf: *bufio.NewReaderSize(nil, maxElementSize*2), + remainingAttempts: attempts, } return d, nil } -func (d *downloader) startStream() error { - next := d.state[len(d.state)-1] - req, err := http.NewRequest("GET", d.client.hostname+next.c.String()+"?dag-scope=entity", bytes.NewReader(nil)) +func (d *downloader) startStream(todo region) error { + req, err := http.NewRequest("GET", d.client.hostname+todo.c.String()+"?dag-scope=entity", bytes.NewReader(nil)) if err != nil { return err } @@ -129,7 +141,7 @@ func (d *downloader) startStream() error { var good bool defer func() { if !good { - resp.Body.Close() + d.Close() } }() @@ -202,9 +214,23 @@ func (d *downloader) Read(b []byte) (_ int, err error) { if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, c); err != nil { return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) } - data, err = d.readBlockFromStream(c) - if err != nil { - return 0, fmt.Errorf("reading block: %w", err) + var errStartStream, errRead error + for { + if d.stream == nil { + if attempts := d.remainingAttempts; attempts != math.MaxUint { + if attempts == 0 { + return 0, errors.Join(errRead, errStartStream) + } + d.remainingAttempts = attempts - 1 + } + errStartStream = d.startStream(todo) + } + data, errRead = d.readBlockFromStream(c) + if errRead == nil { + break + } + d.stream.Close() + d.stream = nil } } @@ -250,7 +276,9 @@ func (d *downloader) Read(b []byte) (_ int, err error) { return n, nil } -// readBlockFromStream must return a hash checked block. +// readBlockFromStream must perform hash verification on the input. +// The slice returned only has to be valid between two readBlockFromStream and Close calls. +// Implementations should reuse buffers to avoid allocations. func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { itemLenU, err := binary.ReadUvarint(&d.buf) if err != nil { @@ -304,7 +332,11 @@ func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { } func (d *downloader) Close() error { - return d.stream.Close() + if s := d.stream; s != nil { + d.stream = nil + return s.Close() + } + return nil } func normalizeCidv0(c cid.Cid) cid.Cid { From ca14179012399d019f52564c7c1935e9532583a0 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 18 Nov 2023 15:01:56 +0300 Subject: [PATCH 36/39] unixfs, unixfs/feather: cleanup imports --- unixfs/feather/entry.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 961c28aa2..2ab210f52 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -11,12 +11,11 @@ import ( "net/http" "github.com/ipfs/boxo/unixfs" - blocks "github.com/ipfs/go-block-format" - "golang.org/x/exp/slices" - "github.com/ipfs/boxo/verifcid" + blocks "github.com/ipfs/go-block-format" "github.com/ipfs/go-cid" mh "github.com/multiformats/go-multihash" + "golang.org/x/exp/slices" ) func cidStringTruncate(c cid.Cid) string { From c3929fff93cd6e8675fe90ddace9e4e3cd828065 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 18 Nov 2023 15:05:34 +0300 Subject: [PATCH 37/39] unixfs/feather: move retry logic to it's own next function --- unixfs/feather/entry.go | 51 +++++++++++++++++++++------------- unixfs/feather/feather_test.go | 1 - 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 2ab210f52..df1280ed3 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -210,26 +210,9 @@ func (d *downloader) Read(b []byte) (_ int, err error) { data = c.Hash() data = data[len(data)-pref.MhLength:] // extract digest default: - if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, c); err != nil { - return 0, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) - } - var errStartStream, errRead error - for { - if d.stream == nil { - if attempts := d.remainingAttempts; attempts != math.MaxUint { - if attempts == 0 { - return 0, errors.Join(errRead, errStartStream) - } - d.remainingAttempts = attempts - 1 - } - errStartStream = d.startStream(todo) - } - data, errRead = d.readBlockFromStream(c) - if errRead == nil { - break - } - d.stream.Close() - d.stream = nil + data, err = d.next(todo) + if err != nil { + return 0, err } } @@ -275,6 +258,34 @@ func (d *downloader) Read(b []byte) (_ int, err error) { return n, nil } +// next download the next block, it also handles performing retries if needed. +// The data return is hash correct. +func (d *downloader) next(todo region) ([]byte, error) { + c := todo.c + if err := verifcid.ValidateCid(verifcid.DefaultAllowlist, c); err != nil { + return nil, fmt.Errorf("cid %s don't pass safe test: %w", cidStringTruncate(c), err) + } + var errStartStream, errRead error + for { + if d.stream == nil { + if attempts := d.remainingAttempts; attempts != math.MaxUint { + if attempts == 0 { + return nil, fmt.Errorf("could not download next block: %w", errors.Join(errRead, errStartStream)) + } + d.remainingAttempts = attempts - 1 + } + errStartStream = d.startStream(todo) + } + var data []byte + data, errRead = d.readBlockFromStream(c) + if errRead == nil { + return data, nil + } + d.stream.Close() + d.stream = nil + } +} + // readBlockFromStream must perform hash verification on the input. // The slice returned only has to be valid between two readBlockFromStream and Close calls. // Implementations should reuse buffers to avoid allocations. diff --git a/unixfs/feather/feather_test.go b/unixfs/feather/feather_test.go index a7d7243b5..052420cf2 100644 --- a/unixfs/feather/feather_test.go +++ b/unixfs/feather/feather_test.go @@ -46,7 +46,6 @@ func newGateway(t *testing.T, fixture string) (*httptest.Server, cid.Cid) { ts := httptest.NewServer(handler) t.Cleanup(func() { ts.Close() }) - t.Logf("test server url: %s", ts.URL) return ts, cids[0] } From b568a581e9e047a21b263cf1ec9581af74e59218 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 18 Nov 2023 15:07:55 +0300 Subject: [PATCH 38/39] unixfs/feather: cleanup CID normalization --- unixfs/feather/entry.go | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index df1280ed3..93dd813be 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -110,15 +110,13 @@ func NewClient(opts ...Option) (*Client, error) { // DownloadFile takes in a [cid.Cid] and return an [io.ReadCloser] which streams the deserialized file. // You MUST always call the Close method when you are done using it else it would leak resources. func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { - c = normalizeCidv0(c) - attempts := client.retries if attempts != math.MaxUint { attempts++ } d := &downloader{ client: client, - state: []region{{c: c}}, + state: []region{{c: normalizeCidv0(c)}}, buf: *bufio.NewReaderSize(nil, maxElementSize*2), remainingAttempts: attempts, } @@ -202,7 +200,6 @@ func (d *downloader) Read(b []byte) (_ int, err error) { var data []byte c := todo.c - c = normalizeCidv0(c) pref := c.Prefix() switch pref.MhType { @@ -235,7 +232,7 @@ func (d *downloader) Read(b []byte) (_ int, err error) { for i := len(childs); i > 0; { i-- regions = append(regions, region{ - c: childs[i].Cid, + c: normalizeCidv0(childs[i].Cid), size: childs[i].FileSize, rangeKnown: true, }) From 2599f37a0c1a8df492b439413be77a3cabe699e7 Mon Sep 17 00:00:00 2001 From: Jorropo Date: Sat, 18 Nov 2023 15:54:34 +0300 Subject: [PATCH 39/39] TODO --- unixfs/feather/entry.go | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/unixfs/feather/entry.go b/unixfs/feather/entry.go index 93dd813be..f87a44f39 100644 --- a/unixfs/feather/entry.go +++ b/unixfs/feather/entry.go @@ -47,6 +47,8 @@ type downloader struct { client *Client remainingAttempts uint stream io.Closer + hasRetries bool + gotOneBlock bool } type Client struct { @@ -119,12 +121,14 @@ func (client *Client) DownloadFile(c cid.Cid) (io.ReadCloser, error) { state: []region{{c: normalizeCidv0(c)}}, buf: *bufio.NewReaderSize(nil, maxElementSize*2), remainingAttempts: attempts, + hasRetries: client.retries != 0, } return d, nil } func (d *downloader) startStream(todo region) error { + d.gotOneBlock = false req, err := http.NewRequest("GET", d.client.hostname+todo.c.String()+"?dag-scope=entity", bytes.NewReader(nil)) if err != nil { return err @@ -265,6 +269,9 @@ func (d *downloader) next(todo region) ([]byte, error) { var errStartStream, errRead error for { if d.stream == nil { + if !d.hasRetries && errRead == io.EOF { + return nil, fmt.Errorf("gateway terminated too early, still want: %s", cidStringTruncate(c)) + } if attempts := d.remainingAttempts; attempts != math.MaxUint { if attempts == 0 { return nil, fmt.Errorf("could not download next block: %w", errors.Join(errRead, errStartStream)) @@ -286,10 +293,15 @@ func (d *downloader) next(todo region) ([]byte, error) { // readBlockFromStream must perform hash verification on the input. // The slice returned only has to be valid between two readBlockFromStream and Close calls. // Implementations should reuse buffers to avoid allocations. -func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { +func (d *downloader) readBlockFromStream(expectedCid cid.Cid) (_ []byte, rErr error) { itemLenU, err := binary.ReadUvarint(&d.buf) - if err != nil { + switch err { + case io.EOF: return nil, err + case nil: + break + default: + return nil, fmt.Errorf("reading next block length: %w", err) } if itemLenU > maxBlockSize+maxCidSize { return nil, fmt.Errorf("item size (%d) for %s exceed maxBlockSize+maxCidSize (%d)", itemLenU, cidStringTruncate(expectedCid), maxBlockSize+maxCidSize) @@ -298,6 +310,7 @@ func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { cidLen, cidFound, err := cid.CidFromReader(&d.buf) if err != nil { + err = eofWouldBeUnexpected(err) return nil, fmt.Errorf("trying to read %s failed to read cid: %w", cidStringTruncate(expectedCid), err) } if cidLen > maxCidSize { @@ -314,10 +327,7 @@ func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { } data, err := d.buf.Peek(blockSize) if err != nil { - if err == io.EOF { - // don't show io.EOF in case peeking is too short - err = io.ErrUnexpectedEOF - } + err = eofWouldBeUnexpected(err) return nil, fmt.Errorf("peeking at block data for %s verification: %w", cidStringTruncate(expectedCid), err) } _, err = d.buf.Discard(len(data)) @@ -338,6 +348,13 @@ func (d *downloader) readBlockFromStream(expectedCid cid.Cid) ([]byte, error) { return data, nil } +func eofWouldBeUnexpected(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + return err +} + func (d *downloader) Close() error { if s := d.stream; s != nil { d.stream = nil