Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Basic Filestore implementation. #3368

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions blocks/blockstore/blockstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ import (
var log = logging.Logger("blockstore")

// BlockPrefix namespaces blockstore datastores
var BlockPrefix = ds.NewKey("blocks")
const DefaultPrefix = "/blocks"

var blockPrefix = ds.NewKey(DefaultPrefix)

var ValueTypeMismatch = errors.New("the retrieved value is not a Block")
var ErrHashMismatch = errors.New("block in storage has different hash than requested")
Expand Down Expand Up @@ -71,20 +73,23 @@ type gcBlockstore struct {
}

func NewBlockstore(d ds.Batching) *blockstore {
return NewBlockstoreWPrefix(d, DefaultPrefix)
}

func NewBlockstoreWPrefix(d ds.Batching, prefix string) *blockstore {
var dsb ds.Batching
dd := dsns.Wrap(d, BlockPrefix)
prefixKey := ds.NewKey(prefix)
dd := dsns.Wrap(d, prefixKey)
dsb = dd
return &blockstore{
datastore: dsb,
prefix: prefixKey,
}
}

type blockstore struct {
datastore ds.Batching

lk sync.RWMutex
gcreq int32
gcreqlk sync.Mutex
prefix ds.Key

rehash bool
}
Expand Down Expand Up @@ -175,7 +180,7 @@ func (bs *blockstore) AllKeysChan(ctx context.Context) (<-chan *cid.Cid, error)
// KeysOnly, because that would be _a lot_ of data.
q := dsq.Query{KeysOnly: true}
// datastore/namespace does *NOT* fix up Query.Prefix
q.Prefix = BlockPrefix.String()
q.Prefix = bs.prefix.String()
res, err := bs.datastore.Query(q)
if err != nil {
return nil, err
Expand Down
4 changes: 2 additions & 2 deletions blocks/blockstore/blockstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ func TestAllKeysRespectsContext(t *testing.T) {
default:
}

e := dsq.Entry{Key: BlockPrefix.ChildString("foo").String()}
e := dsq.Entry{Key: blockPrefix.ChildString("foo").String()}
resultChan <- dsq.Result{Entry: e} // let it go.
close(resultChan)
<-done // should be done now.
Expand All @@ -190,7 +190,7 @@ func TestValueTypeMismatch(t *testing.T) {
block := blocks.NewBlock([]byte("some data"))

datastore := ds.NewMapDatastore()
k := BlockPrefix.Child(dshelp.CidToDsKey(block.Cid()))
k := blockPrefix.Child(dshelp.CidToDsKey(block.Cid()))
datastore.Put(k, "data that isn't a block!")

blockstore := NewBlockstore(ds_sync.MutexWrap(datastore))
Expand Down
17 changes: 16 additions & 1 deletion core/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
pin "github.com/ipfs/go-ipfs/pin"
repo "github.com/ipfs/go-ipfs/repo"
cfg "github.com/ipfs/go-ipfs/repo/config"
fsrepo "github.com/ipfs/go-ipfs/repo/fsrepo"

context "context"
retry "gx/ipfs/QmPF5kxTYFkzhaY5LmkExood7aTTZBHWQC6cjdDQBuGrjp/retry-datastore"
Expand All @@ -26,6 +27,9 @@ import (
ds "gx/ipfs/QmbzuUusHqaLLoNTDEVLcSF6vZDHZDLPC7p4bztRvvkXxU/go-datastore"
dsync "gx/ipfs/QmbzuUusHqaLLoNTDEVLcSF6vZDHZDLPC7p4bztRvvkXxU/go-datastore/sync"
ci "gx/ipfs/QmfWDLQjGjVe4fr5CoztYW2DYYjRysMJrFe1RCsXLPTf46/go-libp2p-crypto"

"github.com/ipfs/go-ipfs/filestore"
"github.com/ipfs/go-ipfs/filestore/support"
)

type BuildCfg struct {
Expand Down Expand Up @@ -184,7 +188,14 @@ func setupNode(ctx context.Context, n *IpfsNode, cfg *BuildCfg) error {
return err
}

n.Blockstore = bstore.NewGCBlockstore(cbs, bstore.NewGCLocker())
var mbs bstore.Blockstore = cbs

if n.Repo.DirectMount(fsrepo.FilestoreMount) != nil {
fs := bstore.NewBlockstoreWPrefix(n.Repo.Datastore(), fsrepo.FilestoreMount)
mbs = filestore_support.NewMultiBlockstore(cbs, fs)
}

n.Blockstore = bstore.NewGCBlockstore(mbs, bstore.NewGCLocker())

rcfg, err := n.Repo.Config()
if err != nil {
Expand All @@ -206,9 +217,13 @@ func setupNode(ctx context.Context, n *IpfsNode, cfg *BuildCfg) error {

n.Blocks = bserv.New(n.Blockstore, n.Exchange)
n.DAG = dag.NewDAGService(n.Blocks)
if fs, ok := n.Repo.DirectMount(fsrepo.FilestoreMount).(*filestore.Datastore); ok {
n.DAG = filestore_support.NewDAGService(fs, n.DAG)
}

internalDag := dag.NewDAGService(bserv.New(n.Blockstore, offline.Exchange(n.Blockstore)))
n.Pinning, err = pin.LoadPinner(n.Repo.Datastore(), n.DAG, internalDag)

if err != nil {
// TODO: we should move towards only running 'NewPinner' explicity on
// node init instead of implicitly here as a result of the pinner keys
Expand Down
35 changes: 29 additions & 6 deletions core/commands/add.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
package commands

import (
"errors"
"fmt"
"io"

"github.com/ipfs/go-ipfs/core/coreunix"
"github.com/ipfs/go-ipfs/filestore"
"github.com/ipfs/go-ipfs/filestore/support"
"github.com/ipfs/go-ipfs/repo/fsrepo"
"gx/ipfs/QmeWjRodbcZFKe5tMN7poEx3izym6osrLSnTLf9UjJZBbs/pb"

blockservice "github.com/ipfs/go-ipfs/blockservice"
//bs "github.com/ipfs/go-ipfs/blocks/blockstore"
bserv "github.com/ipfs/go-ipfs/blockservice"
cmds "github.com/ipfs/go-ipfs/commands"
files "github.com/ipfs/go-ipfs/commands/files"
core "github.com/ipfs/go-ipfs/core"
Expand All @@ -33,6 +38,7 @@ const (
chunkerOptionName = "chunker"
pinOptionName = "pin"
rawLeavesOptionName = "raw-leaves"
noCopyName = "no-copy"
)

var AddCmd = &cmds.Command{
Expand Down Expand Up @@ -80,6 +86,7 @@ You can now refer to the added file in a gateway, like so:
cmds.StringOption(chunkerOptionName, "s", "Chunking algorithm to use."),
cmds.BoolOption(pinOptionName, "Pin this object when adding.").Default(true),
cmds.BoolOption(rawLeavesOptionName, "Use raw blocks for leaf nodes. (experimental)"),
cmds.BoolOption(noCopyName, "Don't copy file contents. (experimental)"),
},
PreRun: func(req cmds.Request) error {
if quiet, _, _ := req.Option(quietOptionName).Bool(); quiet {
Expand Down Expand Up @@ -138,6 +145,7 @@ You can now refer to the added file in a gateway, like so:
chunker, _, _ := req.Option(chunkerOptionName).String()
dopin, _, _ := req.Option(pinOptionName).Bool()
rawblks, _, _ := req.Option(rawLeavesOptionName).Bool()
nocopy, _, _ := req.Option(noCopyName).Bool()

if hash {
nilnode, err := core.NewNode(n.Context(), &core.BuildCfg{
Expand All @@ -152,18 +160,33 @@ You can now refer to the added file in a gateway, like so:
n = nilnode
}

dserv := n.DAG
exchange := n.Exchange
local, _, _ := req.Option("local").Bool()
if local {
offlineexch := offline.Exchange(n.Blockstore)
bserv := blockservice.New(n.Blockstore, offlineexch)
dserv = dag.NewDAGService(bserv)
exchange = offline.Exchange(n.Blockstore)
}

outChan := make(chan interface{}, 8)
res.SetOutput((<-chan interface{})(outChan))

fileAdder, err := coreunix.NewAdder(req.Context(), n.Pinning, n.Blockstore, dserv)
var fileAdder *coreunix.Adder
if nocopy {
fs, ok := n.Repo.DirectMount(fsrepo.FilestoreMount).(*filestore.Datastore)
if !ok {
res.SetError(errors.New("filestore not enabled"), cmds.ErrNormal)
return
}
blockstore := filestore_support.NewBlockstore(n.Blockstore, fs)
blockService := bserv.NewWriteThrough(blockstore, exchange)
dagService := dag.NewDAGService(blockService)
fileAdder, err = coreunix.NewAdder(req.Context(), n.Pinning, blockstore, dagService)
} else if exchange != n.Exchange {
blockService := bserv.New(n.Blockstore, exchange)
dagService := dag.NewDAGService(blockService)
fileAdder, err = coreunix.NewAdder(req.Context(), n.Pinning, n.Blockstore, dagService)
} else {
fileAdder, err = coreunix.NewAdder(req.Context(), n.Pinning, n.Blockstore, n.DAG)
}
if err != nil {
res.SetError(err, cmds.ErrNormal)
return
Expand Down
2 changes: 1 addition & 1 deletion core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ type IpfsNode struct {
proc goprocess.Process
ctx context.Context

mode mode
mode mode
localModeSet bool
}

Expand Down
76 changes: 76 additions & 0 deletions filestore/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Notes on the Filestore

The filestore is a work-in-progress datastore that stores the unixfs
data component of blocks in files on the filesystem instead of in the
block itself. The main use of the datastore is to add content to IPFS
without duplicating the content in the IPFS datastore.

The filestore is developed on Debian (GNU/Linux). It has has limited
testing on Windows and should work on MacOS X and other Unix like
systems.

## Adding Files

To add a file to IPFS without copying, use `add --no-copy` or to add a
directory use `add --no-copy`. (Throughout this document all
command are assumed to start with `ipfs` so `filestore add` really
mains `ipfs filestore add`). For example to add the file `hello.txt`
use:
```
ipfs filestore add "`pwd`"/hello.txt
```

Paths stored in the filestore must be absolute.

By default, the contents of the file are always verified by
recomputing the hash. The setting `Filestore.Verify` can be used to
change this to never recompute the hash (not recommended) or to only
recompute the hash when the modification-time has changed.

Adding files to the filestore will generally be faster than adding
blocks normally as less data is copied around. Retrieving blocks from
the filestore takes about the same time when the hash is not
recomputed, when it is, retrieval is slower.

## About filestore entries

Each entry in the filestore is uniquely refereed to by combining the
(1) the hash of the block, (2) the path to the file, and (3) the
offset within the file, using the following syntax:
```
<HASH>/<FILEPATH>//<OFFSET>
```
for example:
```
QmVr26fY1tKyspEJBniVhqxQeEjhF78XerGiqWAwraVLQH//somedir/hello.txt//0
```

In the case that there is only one entry for a hash the entry is
stored using just the hash. If there is more than one entry for a
hash (for example if adding two files with identical content) than one
entry will be stored using just the hash and the others will be stored
using the full key. If the backing file changes or becomes
inaccessible for the default entry (the one with just the hash) the
other entries are tried until a valid entry is found. Once a valid
entry is found that entry will become the default.

When listing the contents of the filestore entries that are stored
using just the hash are displayed as
```
<HASH> /<FILEPATH>//<OFFSET>
```
with a space between the <HASH> amd <FILEPATH>.

It is always possible to refer to a specific entry in the filestore
using the full key regardless to how it is stored.

## Controlling when blocks are verified.

The config variable `Filestore.Verify` can be used to customize when
blocks from the filestore are verified. The default value `Always`
will always verify blocks. A value of `IfChanged. will verify a
block if the modification time of the backing file has changed. This
value works well in most cases, but can miss some changes, espacally
if the filesystem only tracks file modification times with a
resolution of one second (HFS+, used by OS X) or less (FAT32). A
value of `Never`, never checks blocks.
Loading