Skip to content

Commit

Permalink
support extraction of unixfs content stored in car files (#263)
Browse files Browse the repository at this point in the history
* support extraction of unixfs content stored in car files
  • Loading branch information
willscott authored Nov 9, 2021
1 parent 85f0751 commit 6d94b7b
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 8 deletions.
20 changes: 20 additions & 0 deletions cmd/car/car.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ func main1() int {
Usage: "Detach an index to a detached file",
Action: DetachCar,
},
{
Name: "extract",
Aliases: []string{"x"},
Usage: "Extract the contents of a car when the car encodes UnixFS data",
Action: ExtractCar,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "file",
Aliases: []string{"f"},
Usage: "The car file to extract from",
Required: true,
TakesFile: true,
},
&cli.BoolFlag{
Name: "verbose",
Aliases: []string{"v"},
Usage: "Include verbose information about extracted contents",
},
},
},
{
Name: "filter",
Aliases: []string{"f"},
Expand Down
228 changes: 228 additions & 0 deletions cmd/car/extract.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
package main

import (
"bytes"
"fmt"
"io"
"os"
"path"
"path/filepath"

"github.com/ipfs/go-cid"
"github.com/ipfs/go-unixfsnode"
"github.com/ipfs/go-unixfsnode/data"
"github.com/ipfs/go-unixfsnode/file"
"github.com/ipld/go-car/v2/blockstore"
dagpb "github.com/ipld/go-codec-dagpb"
"github.com/ipld/go-ipld-prime"
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
basicnode "github.com/ipld/go-ipld-prime/node/basic"
"github.com/urfave/cli/v2"
)

// ExtractCar pulls files and directories out of a car
func ExtractCar(c *cli.Context) error {
outputDir, err := os.Getwd()
if err != nil {
return err
}
if c.Args().Present() {
outputDir = c.Args().First()
}

bs, err := blockstore.OpenReadOnly(c.String("file"))
if err != nil {
return err
}

ls := cidlink.DefaultLinkSystem()
ls.TrustedStorage = true
ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) {
cl, ok := l.(cidlink.Link)
if !ok {
return nil, fmt.Errorf("not a cidlink")
}
blk, err := bs.Get(cl.Cid)
if err != nil {
return nil, err
}
return bytes.NewBuffer(blk.RawData()), nil
}

roots, err := bs.Roots()
if err != nil {
return err
}

for _, root := range roots {
if err := extractRoot(c, &ls, root, outputDir); err != nil {
return err
}
}

return nil
}

func extractRoot(c *cli.Context, ls *ipld.LinkSystem, root cid.Cid, outputDir string) error {
if root.Prefix().Codec == cid.Raw {
if c.IsSet("verbose") {
fmt.Fprintf(c.App.ErrWriter, "skipping raw root %s\n", root)
}
return nil
}

pbn, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: root}, dagpb.Type.PBNode)
if err != nil {
return err
}
pbnode := pbn.(dagpb.PBNode)

ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
if err != nil {
return err
}

outputResolvedDir, err := filepath.EvalSymlinks(outputDir)
if err != nil {
return err
}
if _, err := os.Stat(outputResolvedDir); os.IsNotExist(err) {
if err := os.Mkdir(outputResolvedDir, 0755); err != nil {
return err
}
}
if err := extractDir(c, ls, ufn, outputResolvedDir, "/"); err != nil {
return fmt.Errorf("%s: %w", root, err)
}

return nil
}

func resolvePath(root, pth string) (string, error) {
rp, err := filepath.Rel("/", pth)
if err != nil {
return "", fmt.Errorf("couldn't check relative-ness of %s: %w", pth, err)
}
joined := path.Join(root, rp)

basename := path.Dir(joined)
final, err := filepath.EvalSymlinks(basename)
if err != nil {
return "", fmt.Errorf("couldn't eval symlinks in %s: %w", basename, err)
}
if final != path.Clean(basename) {
return "", fmt.Errorf("path attempts to redirect through symlinks")
}
return joined, nil
}

func extractDir(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputRoot, outputPath string) error {
dirPath, err := resolvePath(outputRoot, outputPath)
if err != nil {
return err
}
// make the directory.
if err := os.MkdirAll(dirPath, 0755); err != nil {
return err
}

if n.Kind() == ipld.Kind_Map {
mi := n.MapIterator()
for !mi.Done() {
key, val, err := mi.Next()
if err != nil {
return err
}
ks, err := key.AsString()
if err != nil {
return err
}
nextRes, err := resolvePath(outputRoot, path.Join(outputPath, ks))
if err != nil {
return err
}
if c.IsSet("verbose") {
fmt.Fprintf(c.App.Writer, "%s\n", nextRes)
}

if val.Kind() != ipld.Kind_Link {
return fmt.Errorf("unexpected map value for %s at %s", ks, outputPath)
}
// a directory may be represented as a map of name:<link> if unixADL is applied
vl, err := val.AsLink()
if err != nil {
return err
}
dest, err := ls.Load(ipld.LinkContext{}, vl, basicnode.Prototype.Any)
if err != nil {
return err
}
// degenerate files are handled here.
if dest.Kind() == ipld.Kind_Bytes {
if err := extractFile(c, ls, dest, nextRes); err != nil {
return err
}
continue
} else {
// dir / pbnode
pbb := dagpb.Type.PBNode.NewBuilder()
if err := pbb.AssignNode(dest); err != nil {
return err
}
dest = pbb.Build()
}
pbnode := dest.(dagpb.PBNode)

// interpret dagpb 'data' as unixfs data and look at type.
ufsData, err := pbnode.LookupByString("Data")
if err != nil {
return err
}
ufsBytes, err := ufsData.AsBytes()
if err != nil {
return err
}
ufsNode, err := data.DecodeUnixFSData(ufsBytes)
if err != nil {
return err
}
if ufsNode.DataType.Int() == data.Data_Directory || ufsNode.DataType.Int() == data.Data_HAMTShard {
ufn, err := unixfsnode.Reify(ipld.LinkContext{}, pbnode, ls)
if err != nil {
return err
}

if err := extractDir(c, ls, ufn, outputRoot, path.Join(outputPath, ks)); err != nil {
return err
}
} else if ufsNode.DataType.Int() == data.Data_File || ufsNode.DataType.Int() == data.Data_Raw {
if err := extractFile(c, ls, pbnode, nextRes); err != nil {
return err
}
} else if ufsNode.DataType.Int() == data.Data_Symlink {
data := ufsNode.Data.Must().Bytes()
if err := os.Symlink(string(data), nextRes); err != nil {
return err
}
}
}
return nil
}
return fmt.Errorf("not a directory")
}

func extractFile(c *cli.Context, ls *ipld.LinkSystem, n ipld.Node, outputName string) error {
node, err := file.NewUnixFSFile(c.Context, n, ls)
if err != nil {
return err
}

f, err := os.Create(outputName)
if err != nil {
return err
}
defer f.Close()
_, err = io.Copy(f, node)

return err
}
12 changes: 12 additions & 0 deletions cmd/car/testdata/script/create-extract.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
car create --file=out.car foo.txt bar.txt
mkdir out
car extract -v -f out.car out
! stderr .
stdout -count=2 'txt$'
car create --file=out2.car out/foo.txt out/bar.txt
cmp out.car out2.car

-- foo.txt --
foo content
-- bar.txt --
bar content
4 changes: 2 additions & 2 deletions cmd/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ require (
github.com/ipfs/go-block-format v0.0.3
github.com/ipfs/go-cid v0.1.0
github.com/ipfs/go-ipfs-blockstore v1.0.3
github.com/ipfs/go-unixfsnode v1.1.4-0.20211018205408-e0bbe4aca062
github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e
github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c
github.com/ipld/go-car/v2 v2.0.3-0.20211001222544-c93f5367a75c
github.com/ipld/go-car/v2 v2.1.0
github.com/ipld/go-codec-dagpb v1.3.0
github.com/ipld/go-ipld-prime v0.12.4-0.20211014180653-3ba656a3bc6b
github.com/multiformats/go-multicodec v0.3.1-0.20210902112759-1539a079fd61
Expand Down
16 changes: 10 additions & 6 deletions cmd/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,10 @@ github.com/ipfs/go-datastore v0.0.5/go.mod h1:d4KVXhMt913cLBEI/PXAy6ko+W7e9AhyAK
github.com/ipfs/go-datastore v0.1.0/go.mod h1:d4KVXhMt913cLBEI/PXAy6ko+W7e9AhyAKBGh803qeE=
github.com/ipfs/go-datastore v0.3.1/go.mod h1:w38XXW9kVFNp57Zj5knbKWM2T+KOZCGDRVNdgPHtbHw=
github.com/ipfs/go-datastore v0.4.1/go.mod h1:SX/xMIKoCszPqp+z9JhPYCmoOoXTvaa13XEbGtsFUhA=
github.com/ipfs/go-datastore v0.4.2 h1:h8/n7WPzhp239kkLws+epN3Ic7YtcBPgcaXfEfdVDWM=
github.com/ipfs/go-datastore v0.4.2/go.mod h1:SX/xMIKoCszPqp+z9JhPYCmoOoXTvaa13XEbGtsFUhA=
github.com/ipfs/go-datastore v0.4.6 h1:zU2cmweykxJ+ziXnA2cPtsLe8rdR/vrthOipLPuf6kc=
github.com/ipfs/go-datastore v0.4.6/go.mod h1:XSipLSc64rFKSFRFGo1ecQl+WhYce3K7frtpHkyPFUc=
github.com/ipfs/go-detect-race v0.0.1 h1:qX/xay2W3E4Q1U7d9lNs1sU9nvguX0a7319XbyQ6cOk=
github.com/ipfs/go-detect-race v0.0.1/go.mod h1:8BNT7shDZPo99Q74BpGMK+4D8Mn4j46UU0LZ723meps=
github.com/ipfs/go-ds-badger v0.0.2/go.mod h1:Y3QpeSFWQf6MopLTiZD+VT6IC1yZqaGmjvRcKeSGij8=
github.com/ipfs/go-ds-leveldb v0.0.1/go.mod h1:feO8V3kubwsEF22n0YRQCffeb79OOYIykR4L04tMOYc=
Expand Down Expand Up @@ -247,14 +249,14 @@ github.com/ipfs/go-peertaskqueue v0.1.0 h1:bpRbgv76eT4avutNPDFZuCPOQus6qTgurEYxf
github.com/ipfs/go-peertaskqueue v0.1.0/go.mod h1:Jmk3IyCcfl1W3jTW3YpghSwSEC6IJ3Vzz/jUmWw8Z0U=
github.com/ipfs/go-unixfs v0.2.4 h1:6NwppOXefWIyysZ4LR/qUBPvXd5//8J3jiMdvpbw6Lo=
github.com/ipfs/go-unixfs v0.2.4/go.mod h1:SUdisfUjNoSDzzhGVxvCL9QO/nKdwXdr+gbMUdqcbYw=
github.com/ipfs/go-unixfsnode v1.1.4-0.20211018205408-e0bbe4aca062 h1:SAnV3UCdeWZJdh6MgiJgf4q/icHdKN/TAl+fGdn97BI=
github.com/ipfs/go-unixfsnode v1.1.4-0.20211018205408-e0bbe4aca062/go.mod h1:EwI/kBD0ypab4x1JSn+CNt8ioH1uaGJTJcGyi5McERE=
github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e h1:EvUeWaQoNWLoxupHbeREW+yol0iEuzSknAMNthLsKdM=
github.com/ipfs/go-unixfsnode v1.1.4-0.20211105121048-b9b6e9dc571e/go.mod h1:OmvLSnywiObMHBLt39Xo9jO+z+/rDNx82Yhn6QmPGHY=
github.com/ipfs/go-verifcid v0.0.1 h1:m2HI7zIuR5TFyQ1b79Da5N9dnnCP1vcu2QqawmWlK2E=
github.com/ipfs/go-verifcid v0.0.1/go.mod h1:5Hrva5KBeIog4A+UpqlaIU+DEstipcJYQQZc0g37pY0=
github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c h1:lUNcb71DmG/GSEim2UqDiOcm6E+jqNzo3gWH4JSjOqg=
github.com/ipld/go-car v0.3.2-0.20211001222544-c93f5367a75c/go.mod h1:lIyfp4c4fs4qMQYBWPjHzT7fXxtw5r/Sj4QBVnTaoJc=
github.com/ipld/go-car/v2 v2.0.3-0.20211001222544-c93f5367a75c h1:aayNPQ5nHk2YhA3N61mERYQDL8yL86hekenOJfiulok=
github.com/ipld/go-car/v2 v2.0.3-0.20211001222544-c93f5367a75c/go.mod h1:Xr6GwkDhv8dtOtgHzOynAkIOg0t0YiPc5DxBPppWqZA=
github.com/ipld/go-car/v2 v2.1.0 h1:t8R/WXUSkfu1K1gpPk76mytCxsEdMjGcMIgpOq3/Cnw=
github.com/ipld/go-car/v2 v2.1.0/go.mod h1:Xr6GwkDhv8dtOtgHzOynAkIOg0t0YiPc5DxBPppWqZA=
github.com/ipld/go-codec-dagpb v1.2.0/go.mod h1:6nBN7X7h8EOsEejZGqC7tej5drsdBAXbMHyBT+Fne5s=
github.com/ipld/go-codec-dagpb v1.3.0 h1:czTcaoAuNNyIYWs6Qe01DJ+sEX7B+1Z0LcXjSatMGe8=
github.com/ipld/go-codec-dagpb v1.3.0/go.mod h1:ga4JTU3abYApDC3pZ00BC2RSvC3qfBb9MSJkMLSwnhA=
Expand All @@ -271,8 +273,9 @@ github.com/jbenet/go-cienv v0.0.0-20150120210510-1bb1476777ec/go.mod h1:rGaEvXB4
github.com/jbenet/go-cienv v0.1.0/go.mod h1:TqNnHUmJgXau0nCzC7kXWeotg3J9W34CUv5Djy1+FlA=
github.com/jbenet/go-temp-err-catcher v0.0.0-20150120210811-aac704a3f4f2/go.mod h1:8GXXJV31xl8whumTzdZsTt3RnUIiPqzkyf7mxToRCMs=
github.com/jbenet/goprocess v0.0.0-20160826012719-b497e2f366b8/go.mod h1:Ly/wlsjFq/qrU3Rar62tu1gASgGw6chQbSh/XgIIXCY=
github.com/jbenet/goprocess v0.1.3 h1:YKyIEECS/XvcfHtBzxtjBBbWK+MbvA6dG8ASiqwvr10=
github.com/jbenet/goprocess v0.1.3/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4=
github.com/jbenet/goprocess v0.1.4 h1:DRGOFReOMqqDNXwW70QkacFW0YN9QnwLV0Vqk+3oU0o=
github.com/jbenet/goprocess v0.1.4/go.mod h1:5yspPrukOVuOLORacaBi858NqyClJPQxYZlqdZVfqY4=
github.com/jessevdk/go-flags v0.0.0-20141203071132-1679536dcc89/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
Expand All @@ -297,6 +300,7 @@ github.com/koron/go-ssdp v0.0.0-20180514024734-4a0ed625a78b h1:wxtKgYHEncAU00muM
github.com/koron/go-ssdp v0.0.0-20180514024734-4a0ed625a78b/go.mod h1:5Ky9EC2xfoUKUor0Hjgi2BJhCSXJfMOFlmyYrVKGQMk=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
Expand Down

0 comments on commit 6d94b7b

Please sign in to comment.