Merge pull request #27 from ipfs/feat/doc/notes

[WIP] documentation notes
ipfs · Dec 16, 2018 · 2c1b835 · 2c1b835
2 parents ec1ca83 + 0167a27
commit 2c1b835
Show file tree

Hide file tree

Showing 6 changed files with 146 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -33,6 +33,17 @@ import "github.com/ipfs/go-mfs"
 
 Check the [GoDoc documentation](https://godoc.org/github.com/ipfs/go-mfs)
 
+## Repository Structure
+This repository contains many files, all belonging to the root `mfs` package.
+
+* `file.go`: MFS `File`.
+* `dir.go`: MFS `Directory`.
+* `fd.go`: `FileDescriptor` used to operate on `File`s.
+* `ops.go`: Functions that do not belong to either `File` nor `Directory` (although they mostly operate on them) that contain common operations to the MFS, e.g., find, move, add a file, make a directory.
+* `system.go`: Made up of two parts, the MFS `Root` and the `Republisher`.
+* `mfs_test.go`: General tests (needs a [revision](https://github.com/ipfs/go-mfs/issues/9)).
+* `repub_test.go`: Republisher-specific tests (contains only the `TestRepublisher` function).
+
 ## Contribute
 
 PRs accepted.

diff --git a/dir.go b/dir.go
@@ -21,13 +21,20 @@ var ErrNotYetImplemented = errors.New("not yet implemented")
 var ErrInvalidChild = errors.New("invalid child node")
 var ErrDirExists = errors.New("directory already has entry by that name")
 
+// TODO: There's too much functionality associated with this structure,
+// let's organize it (and if possible extract part of it elsewhere)
+// and document the main features of `Directory` here.
 type Directory struct {
 	inode
 
+	// Cache.
+	// TODO: Should this be a single cache of `FSNode`s?
 	childDirs map[string]*Directory
 	files     map[string]*File
 
 	lock sync.Mutex
+	// TODO: What content is being protected here exactly? The entire directory?
+
 	ctx  context.Context
 
 	// UnixFS directory implementation used for creating,
@@ -73,23 +80,47 @@ func (d *Directory) SetCidBuilder(b cid.Builder) {
 
 // closeChild updates the child by the given name to the dag node 'nd'
 // and changes its own dag node
+// `sync` (alias `fullsync`): has two uses, propagate the update upwards
+// (in which case we wouldn't want this?) and in `closeChildUpdate`.
+// TODO: Find *all* the places where `sync`/`fullsync` is evaluated.
 func (d *Directory) closeChild(name string, nd ipld.Node, sync bool) error {
+
+	// There's a local flush (`closeChildUpdate`) and a propagated flush (`closeChild`).
+
 	mynd, err := d.closeChildUpdate(name, nd, sync)
 	if err != nil {
 		return err
 	}
 
+	// TODO: The `sync` seems to be tightly coupling this two pieces of code,
+	// we use the node returned by `closeChildUpdate` (which entails a copy)
+	// only if `sync` is set, and we are discarding it otherwise. At the very
+	// least the `if sync {` clause at the end of `closeChildUpdate` should
+	// be merged with this one.
+
 	if sync {
 		return d.parent.closeChild(d.name, mynd, true)
 	}
 	return nil
 }
 
 // closeChildUpdate is the portion of closeChild that needs to be locked around
+// TODO: Definitely document this.
+// Updates the child entry under `name` with the node `nd` and if `sync`
+// is set it "flushes" the node (adding it to the `DAGService`) that
+// represents this directory.
+// TODO: As mentioned elsewhere "flush" sometimes means persist the node in the
+// DAG service and other update the parent node pointing to it.
+//
+// So, calling this with `sync`/`fullsync` off (this is pretty much the only
+// place where `fullsync` seems to matter) will just update the file entry in
+// this directory without updating the parent and without saving the node.
 func (d *Directory) closeChildUpdate(name string, nd ipld.Node, sync bool) (*dag.ProtoNode, error) {
 	d.lock.Lock()
 	defer d.lock.Unlock()
 
+	// TODO: Clearly define how are we propagating changes to lower layers
+	// like UnixFS.
 	err := d.updateChild(name, nd)
 	if err != nil {
 		return nil, err
@@ -118,6 +149,7 @@ func (d *Directory) flushCurrentNode() (*dag.ProtoNode, error) {
 	}
 
 	return pbnd.Copy().(*dag.ProtoNode), nil
+	// TODO: Why do we need a copy?
 }
 
 func (d *Directory) updateChild(name string, nd ipld.Node) error {
@@ -200,6 +232,8 @@ func (d *Directory) Uncache(name string) {
 	defer d.lock.Unlock()
 	delete(d.files, name)
 	delete(d.childDirs, name)
+	// TODO: We definitely need to join these maps if we are manipulating
+	// them like this.
 }
 
 // childFromDag searches through this directories dag node for a child link
@@ -392,6 +426,9 @@ func (d *Directory) AddUnixFSChild(name string, node ipld.Node) error {
 	return nil
 }
 
+// TODO: Difference between `sync` and `Flush`? This seems
+// to be related to the internal cache and not to the MFS
+// hierarchy update.
 func (d *Directory) sync() error {
 	for name, dir := range d.childDirs {
 		nd, err := dir.GetNode()

diff --git a/fd.go b/fd.go
@@ -9,6 +9,12 @@ import (
 	context "context"
 )
 
+// One `File` can have many `FileDescriptor`s associated to it
+// (only one if it's RW, many if they are RO, see `File.desclock`).
+// A `FileDescriptor` contains the "view" of the file (through an
+// instance of a `DagModifier`), that's why it (and not the `File`)
+// has the responsibility to `Flush` (which crystallizes that view
+// in the `File`'s `Node`).
 type FileDescriptor interface {
 	io.Reader
 	CtxReadFull(context.Context, []byte) (int, error)
@@ -32,6 +38,7 @@ type fileDescriptor struct {
 	sync       bool
 	hasChanges bool
 
+	// TODO: Where is this variable set?
 	closed bool
 }
 
@@ -84,6 +91,7 @@ func (fi *fileDescriptor) Close() error {
 		case OpenWriteOnly, OpenReadWrite:
 			fi.inode.desclock.Unlock()
 		}
+		// TODO: `closed` should be set here.
 	}()
 
 	if fi.closed {
@@ -106,6 +114,8 @@ func (fi *fileDescriptor) Close() error {
 	return nil
 }
 
+// TODO: Who uses `Sync` and who `Flush`? Do the consumers of this API
+// know about the (undocumented) `fullsync` argument?
 func (fi *fileDescriptor) Sync() error {
 	return fi.flushUp(false)
 }
@@ -116,6 +126,9 @@ func (fi *fileDescriptor) Flush() error {
 
 // flushUp syncs the file and adds it to the dagservice
 // it *must* be called with the File's lock taken
+// TODO: What is `fullsync`? Propagate the changes upward
+// to the root flushing every node in the path (the "up"
+// part of `flushUp`).
 func (fi *fileDescriptor) flushUp(fullsync bool) error {
 	nd, err := fi.mod.GetNode()
 	if err != nil {
@@ -129,9 +142,12 @@ func (fi *fileDescriptor) flushUp(fullsync bool) error {
 
 	fi.inode.nodelk.Lock()
 	fi.inode.node = nd
+	// TODO: Create a `SetNode` method.
 	name := fi.inode.name
 	parent := fi.inode.parent
+	// TODO: Can the parent be modified? Do we need to do this inside the lock?
 	fi.inode.nodelk.Unlock()
+	// TODO: Maybe all this logic should happen in `File`.
 
 	return parent.closeChild(name, nd, fullsync)
 }

diff --git a/file.go b/file.go
@@ -13,12 +13,22 @@ import (
 	ipld "github.com/ipfs/go-ipld-format"
 )
 
+// File represents a file in the MFS, its logic its mainly targeted
+// to coordinating (potentially many) `FileDescriptor`s pointing to
+// it.
 type File struct {
 	inode
 
+	// Lock to coordinate the `FileDescriptor`s associated to this file.
 	desclock sync.RWMutex
 
+	// This isn't any node, it's the root node that represents the
+	// entire DAG of nodes that comprise the file.
+	// TODO: Rename, there should be an explicit term for these root nodes
+	// of a particular sub-DAG that abstract an upper layer's entity.
 	node   ipld.Node
+
+	// TODO: Rename.
 	nodelk sync.Mutex
 
 	RawLeaves bool
@@ -52,6 +62,10 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) {
 	node := fi.node
 	fi.nodelk.Unlock()
 
+	// TODO: Move this `switch` logic outside (maybe even
+	// to another package, this seems like a job of UnixFS),
+	// `NewDagModifier` uses the IPLD node, we're not
+	// extracting anything just doing a safety check.
 	switch node := node.(type) {
 	case *dag.ProtoNode:
 		fsn, err := ft.FSNodeFromBytes(node.Data())
@@ -82,6 +96,8 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) {
 	}
 
 	dmod, err := mod.NewDagModifier(context.TODO(), node, fi.dagService, chunker.DefaultSplitter)
+	// TODO: Remove the use of the `chunker` package here, add a new `NewDagModifier` in
+	// `go-unixfs` with the `DefaultSplitter` already included.
 	if err != nil {
 		return nil, err
 	}
@@ -96,6 +112,11 @@ func (fi *File) Open(flags int, sync bool) (FileDescriptor, error) {
 }
 
 // Size returns the size of this file
+// TODO: Should we be providing this API?
+// TODO: There's already a `FileDescriptor.Size()` that
+// through the `DagModifier`'s `fileSize` function is doing
+// pretty much the same thing as here, we should at least call
+// that function and wrap the `ErrNotUnixfs` with an MFS text.
 func (fi *File) Size() (int64, error) {
 	fi.nodelk.Lock()
 	defer fi.nodelk.Unlock()
@@ -114,12 +135,21 @@ func (fi *File) Size() (int64, error) {
 }
 
 // GetNode returns the dag node associated with this file
+// TODO: Use this method and do not access the `nodelk` directly anywhere else.
 func (fi *File) GetNode() (ipld.Node, error) {
 	fi.nodelk.Lock()
 	defer fi.nodelk.Unlock()
 	return fi.node, nil
 }
 
+// TODO: Tight coupling with the `FileDescriptor`, at the
+// very least this should be an independent function that
+// takes a `File` argument and automates the open/flush/close
+// operations.
+// TODO: Why do we need to flush a file that isn't opened?
+// (the `OpenWriteOnly` seems to implicitly be targeting a
+// closed file, a file we forgot to flush? can we close
+// a file without flushing?)
 func (fi *File) Flush() error {
 	// open the file in fullsync mode
 	fd, err := fi.Open(OpenWriteOnly, true)
@@ -134,6 +164,7 @@ func (fi *File) Flush() error {
 
 func (fi *File) Sync() error {
 	// just being able to take the writelock means the descriptor is synced
+	// TODO: Why?
 	fi.desclock.Lock()
 	fi.desclock.Unlock()
 	return nil

diff --git a/ops.go b/ops.go
@@ -12,7 +12,13 @@ import (
 	ipld "github.com/ipfs/go-ipld-format"
 )
 
+// TODO: Evaluate moving all this operations to as `Root`
+// methods, since all of them use it as its first argument
+// and there is no clear documentation that explains this
+// separation.
+
 // Mv moves the file or directory at 'src' to 'dst'
+// TODO: Document what the strings 'src' and 'dst' represent.
 func Mv(r *Root, src, dst string) error {
 	srcDir, srcFname := gopath.Split(src)
 
@@ -83,6 +89,15 @@ func lookupDir(r *Root, path string) (*Directory, error) {
 }
 
 // PutNode inserts 'nd' at 'path' in the given mfs
+// TODO: Rename or clearly document that this is not about nodes but actually
+// MFS files/directories (that in the underlying representation can be
+// considered as just nodes).
+// TODO: Document why are we handling IPLD nodes in the first place when we
+// are actually referring to files/directories (that is, it can't be any
+// node, it has to have a specific format).
+// TODO: Can this function add directories or just files? What would be the
+// difference between adding a directory with this method and creating it
+// with `Mkdir`.
 func PutNode(r *Root, path string, nd ipld.Node) error {
 	dirp, filename := gopath.Split(path)
 	if filename == "" {
@@ -207,6 +222,8 @@ func DirLookup(d *Directory, pth string) (FSNode, error) {
 	return cur, nil
 }
 
+// TODO: Document this function and link its functionality
+// with the republisher.
 func FlushPath(rt *Root, pth string) error {
 	nd, err := Lookup(rt, pth)
 	if err != nil {