From 92a65c194dd1371cf133d8df294c7007bdb7ac1a Mon Sep 17 00:00:00 2001 From: Will Scott Date: Thu, 4 Nov 2021 19:09:38 +0000 Subject: [PATCH 1/6] support extraction of unixfs content stored in car files --- cmd/car/car.go | 19 ++++++ cmd/car/extract.go | 161 +++++++++++++++++++++++++++++++++++++++++++++ cmd/go.mod | 4 +- cmd/go.sum | 16 +++-- 4 files changed, 192 insertions(+), 8 deletions(-) create mode 100644 cmd/car/extract.go diff --git a/cmd/car/car.go b/cmd/car/car.go index 850673c6..74f3ad53 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -34,6 +34,25 @@ func main1() int { Usage: "Detach an index to a detached file", Action: DetachCar, }, + { + Name: "extract", + Aliases: []string{"x"}, + Usage: "Extract the contents of a car", + Action: ExtractCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "file", + Aliases: []string{"f"}, + Usage: "The car file to extract from", + TakesFile: true, + }, + &cli.BoolFlag{ + Name: "verbose", + Aliases: []string{"v"}, + Usage: "Include verbose information about extracted contents", + }, + }, + }, { Name: "filter", Aliases: []string{"f"}, diff --git a/cmd/car/extract.go b/cmd/car/extract.go new file mode 100644 index 00000000..10c36f01 --- /dev/null +++ b/cmd/car/extract.go @@ -0,0 +1,161 @@ +package main + +import ( + "bytes" + "fmt" + "io" + "os" + "path" + + "github.com/ipfs/go-cid" + "github.com/ipfs/go-unixfsnode" + "github.com/ipfs/go-unixfsnode/data" + "github.com/ipfs/go-unixfsnode/file" + "github.com/ipld/go-car/v2/blockstore" + dagpb "github.com/ipld/go-codec-dagpb" + "github.com/ipld/go-ipld-prime" + cidlink "github.com/ipld/go-ipld-prime/linking/cid" + "github.com/urfave/cli/v2" +) + +// ExtractCar pulls files and directories out of a car +func ExtractCar(c *cli.Context) error { + if !c.IsSet("file") { + return fmt.Errorf("a file source must be specified") + } + outputDir, err := os.Getwd() + if err != nil { + return err + } + if c.Args().Len() > 0 { + outputDir = c.Args().First() + } + + if c.IsSet("verbose") { + fmt.Printf("writing to %s\n", outputDir) + } + + bs, err := blockstore.OpenReadOnly(c.Args().Get(0)) + if err != nil { + return err + } + + ls := cidlink.DefaultLinkSystem() + ls.TrustedStorage = true + ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) { + cl, ok := l.(cidlink.Link) + if !ok { + return nil, fmt.Errorf("not a cidlink") + } + blk, err := bs.Get(cl.Cid) + if err != nil { + return nil, err + } + return bytes.NewBuffer(blk.RawData()), nil + } + + roots, err := bs.Roots() + if err != nil { + return err + } + + for _, root := range roots { + if err := extractRoot(c, &ls, root, outputDir); err != nil { + return err + } + } + + return nil +} + +func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string) error { + if root.Prefix().Codec == cid.Raw { + if c.IsSet("verbose") { + fmt.Fprintf(os.Stderr, "skipping raw root %s\n", root) + } + return nil + } + + pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode) + if err != nil { + return err + } + pbnode := pbn.(dagpb.PBNode) + + ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls) + if err != nil { + return err + } + + if err := extractDir(c, ls, ufn, outputDir); err != nil { + return fmt.Errorf("%s: %w", root, err) + } + + return nil +} + +func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputDir string) error { + // make the directory. + os.MkdirAll(outputDir, 0755) + + if n.Kind() == ipld.Kind_Map { + mi := n.MapIterator() + for !mi.Done() { + key, val, err := mi.Next() + if err != nil { + return err + } + ks, err := key.AsString() + if err != nil { + return err + } + if val.Kind() == ipld.Kind_Map { + // interpret dagpb 'data' as unixfs data and look at type. + ufsData, err := val.LookupByString("Data") + if err != nil { + return err + } + ufsBytes, err := ufsData.AsBytes() + if err != nil { + return err + } + ufsNode, err := data.DecodeUnixFSData(ufsBytes) + if err != nil { + return err + } + if ufsNode.DataType.Int() == data.Data_Directory || ufsNode.DataType.Int() == data.Data_HAMTShard { + if err := extractDir(c, ls, val, path.Join(outputDir, ks)); err != nil { + return err + } + } else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw { + if err := extractFile(c, ls, val, path.Join(outputDir, ks)); err != nil { + return err + } + } else if ufsNode.DataType.Int() == data.Data_Symlink { + // TODO: symlink + } + } else { + if err := extractFile(c, ls, val, path.Join(outputDir, ks)); err != nil { + return err + } + } + } + return nil + } + return fmt.Errorf("not a directory") +} + +func extractFile(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error { + node, err := file.NewUnixFSFile(c.Context, n, ls) + if err != nil { + return err + } + f, err := os.Create(outputName) + if err != nil { + return err + } + defer f.Close() + _, err = io.Copy(f, node) + + return err +} diff --git a/cmd/go.mod b/cmd/go.mod index b7ef9f72..cde4d95f 100644 --- a/cmd/go.mod +++ b/cmd/go.mod @@ -7,9 +7,9 @@ require ( github.com/ipfs/go-block-format v0.0.3 github.com/ipfs/go-cid v0.1.0 github.com/ipfs/go-ipfs-blockstore v1.0.3 - github.com/ipfs/go-unixfsnode v1.1.4-0.20211018205408-e0bbe4aca062 + github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7 github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c - github.com/ipld/go-car/v2 v2.0.3-0.20211001222544-c93f5367a75c + github.com/ipld/go-car/v2 v2.1.0 github.com/ipld/go-codec-dagpb v1.3.0 github.com/ipld/go-ipld-prime v0.12.4-0.20211014180653-3ba656a3bc6b github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61 diff --git a/cmd/go.sum b/cmd/go.sum index 77155804..9160b5aa 100644 --- a/cmd/go.sum +++ b/cmd/go.sum @@ -196,8 +196,10 @@ github.com/ipfs/go-datastore v0.0.5/go.mod h1:d4KVXhMt913cLBEI/PXAy6ko+W7e9AhyAK github.com/ipfs/go-datastore v0.1.0/go.mod h1:d4KVXhMt913cLBEI/PXAy6ko+W7e9AhyAKBGh803qeE= github.com/ipfs/go-datastore v0.3.1/go.mod h1:w38XXW9kVFNp57Zj5knbKWM2T+KOZCGDRVNdgPHtbHw= github.com/ipfs/go-datastore v0.4.1/go.mod h1:SX/xMIKoCszPqp+z9JhPYCmoOoXTvaa13XEbGtsFUhA= -github.com/ipfs/go-datastore v0.4.2 h1:h8/n7WPzhp239kkLws+epN3Ic7YtcBPgcaXfEfdVDWM= github.com/ipfs/go-datastore v0.4.2/go.mod h1:SX/xMIKoCszPqp+z9JhPYCmoOoXTvaa13XEbGtsFUhA= +github.com/ipfs/go-datastore v0.4.6 h1:zU2cmweykxJ+ziXnA2cPtsLe8rdR/vrthOipLPuf6kc= +github.com/ipfs/go-datastore v0.4.6/go.mod h1:XSipLSc64rFKSFRFGo1ecQl+WhYce3K7frtpHkyPFUc= +github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk= github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps= github.com/ipfs/go-ds-badger v0.0.2/go.mod h1:Y3QpeSFWQf6MopLTiZD+VT6IC1yZqaGmjvRcKeSGij8= github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc= @@ -247,14 +249,14 @@ github.com/ipfs/go-peertaskqueue v0.1.0 h1:bpRbgv76eT4avutNPDFZuCPOQus6qTgurEYxf github.com/ipfs/go-peertaskqueue v0.1.0/go.mod h1:Jmk3IyCcfl1W3jTW3YpghSwSEC6IJ3Vzz/jUmWw8Z0U= github.com/ipfs/go-unixfs v0.2.4 h1:6NwppOXefWIyysZ4LR/qUBPvXd5//8J3jiMdvpbw6Lo= github.com/ipfs/go-unixfs v0.2.4/go.mod h1:SUdisfUjNoSDzzhGVxvCL9QO/nKdwXdr+gbMUdqcbYw= -github.com/ipfs/go-unixfsnode v1.1.4-0.20211018205408-e0bbe4aca062 h1:SAnV3UCdeWZJdh6MgiJgf4q/icHdKN/TAl+fGdn97BI= -github.com/ipfs/go-unixfsnode v1.1.4-0.20211018205408-e0bbe4aca062/go.mod h1:EwI/kBD0ypab4x1JSn+CNt8ioH1uaGJTJcGyi5McERE= +github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7 h1:WBrDHxU8jdNeN/D89ugkOAEhcKsfoINOdwW2eiDyGgQ= +github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7/go.mod h1:OmvLSnywiObMHBLt39Xo9jO+z+/rDNx82Yhn6QmPGHY= github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2E= github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0= github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c h1:lUNcb71DmG/GSEim2UqDiOcm6E+jqNzo3gWH4JSjOqg= github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c/go.mod h1:lIyfp4c4fs4qMQYBWPjHzT7fXxtw5r/Sj4QBVnTaoJc= -github.com/ipld/go-car/v2 v2.0.3-0.20211001222544-c93f5367a75c h1:aayNPQ5nHk2YhA3N61mERYQDL8yL86hekenOJfiulok= -github.com/ipld/go-car/v2 v2.0.3-0.20211001222544-c93f5367a75c/go.mod h1:Xr6GwkDhv8dtOtgHzOynAkIOg0t0YiPc5DxBPppWqZA= +github.com/ipld/go-car/v2 v2.1.0 h1:t8R/WXUSkfu1K1gpPk76mytCxsEdMjGcMIgpOq3/Cnw= +github.com/ipld/go-car/v2 v2.1.0/go.mod h1:Xr6GwkDhv8dtOtgHzOynAkIOg0t0YiPc5DxBPppWqZA= github.com/ipld/go-codec-dagpb v1.2.0/go.mod h1:6nBN7X7h8EOsEejZGqC7tej5drsdBAXbMHyBT+Fne5s= github.com/ipld/go-codec-dagpb v1.3.0 h1:czTcaoAuNNyIYWs6Qe01DJ+sEX7B+1Z0LcXjSatMGe8= github.com/ipld/go-codec-dagpb v1.3.0/go.mod h1:ga4JTU3abYApDC3pZ00BC2RSvC3qfBb9MSJkMLSwnhA= @@ -271,8 +273,9 @@ github.com/jbenet/go-cienv v0.0.0-20150120210510-1bb1476777ec/go.mod h1:rGaEvXB4 github.com/jbenet/go-cienv v0.1.0/go.mod h1:TqNnHUmJgXau0nCzC7kXWeotg3J9W34CUv5Djy1+FlA= github.com/jbenet/go-temp-err-catcher v0.0.0-20150120210811-aac704a3f4f2/go.mod h1:8GXXJV31xl8whumTzdZsTt3RnUIiPqzkyf7mxToRCMs= github.com/jbenet/goprocess v0.0.0-20160826012719-b497e2f366b8/go.mod h1:Ly/wlsjFq/qrU3Rar62tu1gASgGw6chQbSh/XgIIXCY= -github.com/jbenet/goprocess v0.1.3 h1:YKyIEECS/XvcfHtBzxtjBBbWK+MbvA6dG8ASiqwvr10= github.com/jbenet/goprocess v0.1.3/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4= +github.com/jbenet/goprocess v0.1.4 h1:DRGOFReOMqqDNXwW70QkacFW0YN9QnwLV0Vqk+3oU0o= +github.com/jbenet/goprocess v0.1.4/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4= github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= @@ -297,6 +300,7 @@ github.com/koron/go-ssdp v0.0.0-20180514024734-4a0ed625a78b h1:wxtKgYHEncAU00muM github.com/koron/go-ssdp v0.0.0-20180514024734-4a0ed625a78b/go.mod h1:5Ky9EC2xfoUKUor0Hjgi2BJhCSXJfMOFlmyYrVKGQMk= github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= From e14a9dcaafe9f23d6c00105a65f685993991e022 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Fri, 5 Nov 2021 12:39:08 +0000 Subject: [PATCH 2/6] extraction seems to work --- cmd/car/extract.go | 79 +++++++++++++++------- cmd/car/testdata/script/create-extract.txt | 12 ++++ cmd/go.mod | 2 +- cmd/go.sum | 2 + 4 files changed, 70 insertions(+), 25 deletions(-) create mode 100644 cmd/car/testdata/script/create-extract.txt diff --git a/cmd/car/extract.go b/cmd/car/extract.go index 10c36f01..6d7322cc 100644 --- a/cmd/car/extract.go +++ b/cmd/car/extract.go @@ -15,6 +15,7 @@ import ( dagpb "github.com/ipld/go-codec-dagpb" "github.com/ipld/go-ipld-prime" cidlink "github.com/ipld/go-ipld-prime/linking/cid" + basicnode "github.com/ipld/go-ipld-prime/node/basic" "github.com/urfave/cli/v2" ) @@ -31,11 +32,7 @@ func ExtractCar(c *cli.Context) error { outputDir = c.Args().First() } - if c.IsSet("verbose") { - fmt.Printf("writing to %s\n", outputDir) - } - - bs, err := blockstore.OpenReadOnly(c.Args().Get(0)) + bs, err := blockstore.OpenReadOnly(c.String("file")) if err != nil { return err } @@ -109,33 +106,67 @@ func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputDir stri if err != nil { return err } - if val.Kind() == ipld.Kind_Map { - // interpret dagpb 'data' as unixfs data and look at type. - ufsData, err := val.LookupByString("Data") - if err != nil { + if c.IsSet("verbose") { + fmt.Fprintf(os.Stdout, "%s\n", path.Join(outputDir, ks)) + } + + if val.Kind() != ipld.Kind_Link { + return fmt.Errorf("unexpected map value for %s at %s", ks, outputDir) + } + // a directory may be represented as a map of name: if unixADL is applied + vl, err := val.AsLink() + if err != nil { + return err + } + dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any) + if err != nil { + return err + } + // degenerate files are handled here. + if dest.Kind() == ipld.Kind_Bytes { + if err := extractFile(c, ls, dest, path.Join(outputDir, ks)); err != nil { return err } - ufsBytes, err := ufsData.AsBytes() - if err != nil { + continue + } else { + // dir / pbnode + pbb := dagpb.Type.PBNode.NewBuilder() + if err := pbb.AssignNode(dest); err != nil { return err } - ufsNode, err := data.DecodeUnixFSData(ufsBytes) + dest = pbb.Build() + } + pbnode := dest.(dagpb.PBNode) + + // interpret dagpb 'data' as unixfs data and look at type. + ufsData, err := pbnode.LookupByString("Data") + if err != nil { + return err + } + ufsBytes, err := ufsData.AsBytes() + if err != nil { + return err + } + ufsNode, err := data.DecodeUnixFSData(ufsBytes) + if err != nil { + return err + } + if ufsNode.DataType.Int() == data.Data_Directory || ufsNode.DataType.Int() == data.Data_HAMTShard { + ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls) if err != nil { return err } - if ufsNode.DataType.Int() == data.Data_Directory || ufsNode.DataType.Int() == data.Data_HAMTShard { - if err := extractDir(c, ls, val, path.Join(outputDir, ks)); err != nil { - return err - } - } else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw { - if err := extractFile(c, ls, val, path.Join(outputDir, ks)); err != nil { - return err - } - } else if ufsNode.DataType.Int() == data.Data_Symlink { - // TODO: symlink + + if err := extractDir(c, ls, ufn, path.Join(outputDir, ks)); err != nil { + return err } - } else { - if err := extractFile(c, ls, val, path.Join(outputDir, ks)); err != nil { + } else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw { + if err := extractFile(c, ls, pbnode, path.Join(outputDir, ks)); err != nil { + return err + } + } else if ufsNode.DataType.Int() == data.Data_Symlink { + data := ufsNode.Data.Must().Bytes() + if err := os.Symlink(string(data), path.Join(outputDir, ks)); err != nil { return err } } diff --git a/cmd/car/testdata/script/create-extract.txt b/cmd/car/testdata/script/create-extract.txt new file mode 100644 index 00000000..648bafc4 --- /dev/null +++ b/cmd/car/testdata/script/create-extract.txt @@ -0,0 +1,12 @@ +car create --file=out.car foo.txt bar.txt +mkdir out +car extract -v -f out.car out +! stderr . +stdout -count=2 'txt$' +car create --file=out2.car out/foo.txt out/bar.txt +cmp out.car out2.car + +-- foo.txt -- +foo content +-- bar.txt -- +bar content diff --git a/cmd/go.mod b/cmd/go.mod index cde4d95f..0d933ec0 100644 --- a/cmd/go.mod +++ b/cmd/go.mod @@ -7,7 +7,7 @@ require ( github.com/ipfs/go-block-format v0.0.3 github.com/ipfs/go-cid v0.1.0 github.com/ipfs/go-ipfs-blockstore v1.0.3 - github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7 + github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c github.com/ipld/go-car/v2 v2.1.0 github.com/ipld/go-codec-dagpb v1.3.0 diff --git a/cmd/go.sum b/cmd/go.sum index 9160b5aa..2a0f5684 100644 --- a/cmd/go.sum +++ b/cmd/go.sum @@ -251,6 +251,8 @@ github.com/ipfs/go-unixfs v0.2.4 h1:6NwppOXefWIyysZ4LR/qUBPvXd5//8J3jiMdvpbw6Lo= github.com/ipfs/go-unixfs v0.2.4/go.mod h1:SUdisfUjNoSDzzhGVxvCL9QO/nKdwXdr+gbMUdqcbYw= github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7 h1:WBrDHxU8jdNeN/D89ugkOAEhcKsfoINOdwW2eiDyGgQ= github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7/go.mod h1:OmvLSnywiObMHBLt39Xo9jO+z+/rDNx82Yhn6QmPGHY= +github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e h1:EvUeWaQoNWLoxupHbeREW+yol0iEuzSknAMNthLsKdM= +github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e/go.mod h1:OmvLSnywiObMHBLt39Xo9jO+z+/rDNx82Yhn6QmPGHY= github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2E= github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0= github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c h1:lUNcb71DmG/GSEim2UqDiOcm6E+jqNzo3gWH4JSjOqg= From d492bd4f1825aff54156aeafcebc730850280a1e Mon Sep 17 00:00:00 2001 From: Will Scott Date: Fri, 5 Nov 2021 13:05:56 +0000 Subject: [PATCH 3/6] tidy --- cmd/go.sum | 2 -- 1 file changed, 2 deletions(-) diff --git a/cmd/go.sum b/cmd/go.sum index 2a0f5684..4e8e2a43 100644 --- a/cmd/go.sum +++ b/cmd/go.sum @@ -249,8 +249,6 @@ github.com/ipfs/go-peertaskqueue v0.1.0 h1:bpRbgv76eT4avutNPDFZuCPOQus6qTgurEYxf github.com/ipfs/go-peertaskqueue v0.1.0/go.mod h1:Jmk3IyCcfl1W3jTW3YpghSwSEC6IJ3Vzz/jUmWw8Z0U= github.com/ipfs/go-unixfs v0.2.4 h1:6NwppOXefWIyysZ4LR/qUBPvXd5//8J3jiMdvpbw6Lo= github.com/ipfs/go-unixfs v0.2.4/go.mod h1:SUdisfUjNoSDzzhGVxvCL9QO/nKdwXdr+gbMUdqcbYw= -github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7 h1:WBrDHxU8jdNeN/D89ugkOAEhcKsfoINOdwW2eiDyGgQ= -github.com/ipfs/go-unixfsnode v1.1.4-0.20211104184909-ef86491c66a7/go.mod h1:OmvLSnywiObMHBLt39Xo9jO+z+/rDNx82Yhn6QmPGHY= github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e h1:EvUeWaQoNWLoxupHbeREW+yol0iEuzSknAMNthLsKdM= github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e/go.mod h1:OmvLSnywiObMHBLt39Xo9jO+z+/rDNx82Yhn6QmPGHY= github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2E= From 8517c6276a360bca754e500d41373ce5f0aacfe2 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Sat, 6 Nov 2021 21:41:23 +0000 Subject: [PATCH 4/6] add cautious path limitation logic --- cmd/car/extract.go | 55 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/cmd/car/extract.go b/cmd/car/extract.go index 6d7322cc..0368c665 100644 --- a/cmd/car/extract.go +++ b/cmd/car/extract.go @@ -6,6 +6,7 @@ import ( "io" "os" "path" + "path/filepath" "github.com/ipfs/go-cid" "github.com/ipfs/go-unixfsnode" @@ -84,16 +85,47 @@ func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir st return err } - if err := extractDir(c, ls, ufn, outputDir); err != nil { + outputResolvedDir, err := filepath.EvalSymlinks(outputDir) + if err != nil { + return err + } + if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) { + if err := os.Mkdir(outputResolvedDir, 0755); err != nil { + return err + } + } + if err := extractDir(c, ls, ufn, outputResolvedDir, "/"); err != nil { return fmt.Errorf("%s: %w", root, err) } return nil } -func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputDir string) error { +func resolvePath(root, pth string) (string, error) { + rp, err := filepath.Rel("/", pth) + if err != nil { + return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err) + } + joined := path.Join(root, rp) + + basename := path.Dir(joined) + final, err := filepath.EvalSymlinks(basename) + if err != nil { + return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err) + } + if final != path.Clean(basename) { + return "", fmt.Errorf("path attempts to redirect through symlinks") + } + return joined, nil +} + +func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string) error { + dirPath, err := resolvePath(outputRoot, outputPath) + if err != nil { + return err + } // make the directory. - os.MkdirAll(outputDir, 0755) + os.MkdirAll(dirPath, 0755) if n.Kind() == ipld.Kind_Map { mi := n.MapIterator() @@ -106,12 +138,16 @@ func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputDir stri if err != nil { return err } + nextRes, err := resolvePath(outputRoot, path.Join(outputPath, ks)) + if err != nil { + return err + } if c.IsSet("verbose") { - fmt.Fprintf(os.Stdout, "%s\n", path.Join(outputDir, ks)) + fmt.Fprintf(os.Stdout, "%s\n", nextRes) } if val.Kind() != ipld.Kind_Link { - return fmt.Errorf("unexpected map value for %s at %s", ks, outputDir) + return fmt.Errorf("unexpected map value for %s at %s", ks, outputPath) } // a directory may be represented as a map of name: if unixADL is applied vl, err := val.AsLink() @@ -124,7 +160,7 @@ func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputDir stri } // degenerate files are handled here. if dest.Kind() == ipld.Kind_Bytes { - if err := extractFile(c, ls, dest, path.Join(outputDir, ks)); err != nil { + if err := extractFile(c, ls, dest, nextRes); err != nil { return err } continue @@ -157,16 +193,16 @@ func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputDir stri return err } - if err := extractDir(c, ls, ufn, path.Join(outputDir, ks)); err != nil { + if err := extractDir(c, ls, ufn, outputRoot, path.Join(outputPath, ks)); err != nil { return err } } else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw { - if err := extractFile(c, ls, pbnode, path.Join(outputDir, ks)); err != nil { + if err := extractFile(c, ls, pbnode, nextRes); err != nil { return err } } else if ufsNode.DataType.Int() == data.Data_Symlink { data := ufsNode.Data.Must().Bytes() - if err := os.Symlink(string(data), path.Join(outputDir, ks)); err != nil { + if err := os.Symlink(string(data), nextRes); err != nil { return err } } @@ -181,6 +217,7 @@ func extractFile(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputName st if err != nil { return err } + f, err := os.Create(outputName) if err != nil { return err From 52c898da4d7b8992be546abf3bc836a65386e0a8 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Sat, 6 Nov 2021 21:45:04 +0000 Subject: [PATCH 5/6] code review comments --- cmd/car/car.go | 1 + cmd/car/extract.go | 9 +++------ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cmd/car/car.go b/cmd/car/car.go index 74f3ad53..84caa549 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -44,6 +44,7 @@ func main1() int { Name: "file", Aliases: []string{"f"}, Usage: "The car file to extract from", + Required: true, TakesFile: true, }, &cli.BoolFlag{ diff --git a/cmd/car/extract.go b/cmd/car/extract.go index 0368c665..551d6554 100644 --- a/cmd/car/extract.go +++ b/cmd/car/extract.go @@ -22,14 +22,11 @@ import ( // ExtractCar pulls files and directories out of a car func ExtractCar(c *cli.Context) error { - if !c.IsSet("file") { - return fmt.Errorf("a file source must be specified") - } outputDir, err := os.Getwd() if err != nil { return err } - if c.Args().Len() > 0 { + if c.Args().Present() { outputDir = c.Args().First() } @@ -69,7 +66,7 @@ func ExtractCar(c *cli.Context) error { func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string) error { if root.Prefix().Codec == cid.Raw { if c.IsSet("verbose") { - fmt.Fprintf(os.Stderr, "skipping raw root %s\n", root) + fmt.Fprintf(c.App.ErrWriter, "skipping raw root %s\n", root) } return nil } @@ -143,7 +140,7 @@ func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, ou return err } if c.IsSet("verbose") { - fmt.Fprintf(os.Stdout, "%s\n", nextRes) + fmt.Fprintf(c.App.Writer, "%s\n", nextRes) } if val.Kind() != ipld.Kind_Link { From 344571f1ab9bb0485e424d200ee8d1176cd1c43b Mon Sep 17 00:00:00 2001 From: Will Scott Date: Mon, 8 Nov 2021 14:52:44 +0000 Subject: [PATCH 6/6] code review --- cmd/car/car.go | 2 +- cmd/car/extract.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/car/car.go b/cmd/car/car.go index 84caa549..43b7307d 100644 --- a/cmd/car/car.go +++ b/cmd/car/car.go @@ -37,7 +37,7 @@ func main1() int { { Name: "extract", Aliases: []string{"x"}, - Usage: "Extract the contents of a car", + Usage: "Extract the contents of a car when the car encodes UnixFS data", Action: ExtractCar, Flags: []cli.Flag{ &cli.StringFlag{ diff --git a/cmd/car/extract.go b/cmd/car/extract.go index 551d6554..4a56e177 100644 --- a/cmd/car/extract.go +++ b/cmd/car/extract.go @@ -122,7 +122,9 @@ func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, ou return err } // make the directory. - os.MkdirAll(dirPath, 0755) + if err := os.MkdirAll(dirPath, 0755); err != nil { + return err + } if n.Kind() == ipld.Kind_Map { mi := n.MapIterator()