Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

WIP: HashFromNode and VerifyDepTree methods #955

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions internal/fs/hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package fs

import (
"crypto/sha256"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strconv"

"github.com/pkg/errors"
)

const (
pathSeparator = string(filepath.Separator)
skipModes = os.ModeDevice | os.ModeNamedPipe | os.ModeSocket | os.ModeCharDevice
)

// HashFromNode returns a deterministic hash of the specified file system node,
// performing a breadth-first traversal of directories. While the specified
// prefix is joined with the pathname to walk the file system, the prefix string
// is eliminated from the pathname of the nodes encounted when hashing the
// pathnames.
//
// This function ignores any file system node named `vendor`, `.bzr`, `.git`,
// `.hg`, and `.svn`, as these are typically used as Version Control System
// (VCS) directories.
//
// Other than the `vendor` and VCS directories mentioned above, the calculated
// hash includes the pathname to every discovered file system node, whether it
// is an empty directory, a non-empty directory, empty file, non-empty file, or
// symbolic link. If a symbolic link, the referent name is included. If a
// non-empty file, the file's contents are incuded. If a non-empty directory,
// the contents of the directory are included.
//
// While filepath.Walk could have been used, that standard library function
// skips symbolic links, and for now, we want to hash the referent string of
// symbolic links.
func HashFromNode(prefix, pathname string) (hash string, err error) {
// Create a single hash instance for the entire operation, rather than a new
// hash for each node we encounter.
h := sha256.New()

// "../../../vendor", "github.com/account/library"
prefixLength := len(prefix)
if prefixLength > 0 {
prefixLength += len(pathSeparator) // if not empty string, include len of path separator
}
joined := filepath.Join(prefix, pathname)

// Initialize a work queue with the os-agnostic cleaned up pathname. Note
// that we use `filepath.Clean` rather than `filepath.Abs`, because we don't
// want the hash to be based on the absolute pathnames of the specified
// directory and contents.
pathnameQueue := []string{joined}

for len(pathnameQueue) > 0 {
// NOTE: unshift a pathname from the queue
pathname, pathnameQueue = pathnameQueue[0], pathnameQueue[1:]

fi, er := os.Lstat(pathname)
if er != nil {
err = errors.Wrap(er, "cannot Lstat")
return
}

mode := fi.Mode()

// Skip special files
if mode&skipModes != 0 {
continue
}

// NOTE: Write pathname to hash, because hash ought to be as much a
// function of the names of the files and directories as their
// contents. Added benefit is that even empty directories and symbolic
// links will effect final hash value.
//
// NOTE: Throughout this function, we ignore return values from writing
// to the hash, because hash write always returns nil error.
_, _ = h.Write([]byte(pathname)[prefixLength:])

if mode&os.ModeSymlink != 0 {
referent, er := os.Readlink(pathname)
if er != nil {
err = errors.Wrap(er, "cannot Readlink")
return
}
// Write the referent to the hash and proceed to the next pathname
// in the queue.
_, _ = h.Write([]byte(referent))
continue
}

fh, er := os.Open(pathname)
if er != nil {
err = errors.Wrap(er, "cannot Open")
return
}

if fi.IsDir() {
childrenNames, er := fh.Readdirnames(0) // 0: read names of all children
if er != nil {
err = errors.Wrap(er, "cannot Readdirnames")
// NOTE: Even if there was an error reading the names of the
// directory entries, we still must close file handle for the
// open directory before we return. In this case, we simply skip
// sorting and adding entry names to the work queue beforehand.
childrenNames = nil
}

// NOTE: Sort children names to ensure deterministic ordering of
// contents of each directory, ensuring hash remains same even if
// operating system returns same values in a different order on
// subsequent invocation.
sort.Strings(childrenNames)

for _, childName := range childrenNames {
switch childName {
case ".", "..", "vendor", ".bzr", ".git", ".hg", ".svn":
// skip
default:
pathnameQueue = append(pathnameQueue, pathname+pathSeparator+childName)
}
}
} else {
_, _ = h.Write([]byte(strconv.FormatInt(fi.Size(), 10))) // format file size as base 10 integer
_, er = io.Copy(h, fh)
err = errors.Wrap(er, "cannot Copy") // errors.Wrap only wraps non-nil, so elide checking here
}

// NOTE: Close the file handle to the open directory or file.
if er = fh.Close(); err == nil {
err = errors.Wrap(er, "cannot Close")
}
if err != nil {
return // early termination iff error
}
}

hash = fmt.Sprintf("%x", h.Sum(nil))
return
}
40 changes: 40 additions & 0 deletions internal/fs/hash_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package fs

import (
"os"
"path/filepath"
"testing"
)

func TestHashFromNodeWithFile(t *testing.T) {
actual, err := HashFromNode("", "./testdata/blob")
if err != nil {
t.Fatal(err)
}
expected := "bf7c45881248f74466f9624e8336747277d7901a4f7af43940be07c5539b78a8"
if actual != expected {
t.Errorf("Actual:\n\t%#q\nExpected:\n\t%#q", actual, expected)
}
}

func TestHashFromNodeWithDirectory(t *testing.T) {
actual, err := HashFromNode("../fs", "testdata/recursive")
if err != nil {
t.Fatal(err)
}
expected := "d5ac28114417eae59b9ac02e3fac5bdff673e93cc91b408cde1989e1cd2efbd0"
if actual != expected {
t.Errorf("Actual:\n\t%#q\nExpected:\n\t%#q", actual, expected)
}
}

var goSource = filepath.Join(os.Getenv("GOPATH"), "src")

func BenchmarkHashFromNode(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := HashFromNode("", goSource)
if err != nil {
b.Fatal(err)
}
}
}
7 changes: 7 additions & 0 deletions internal/fs/testdata/blob
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
fjdkal;fdjskc
xzc
axc
fdsf
adsf
das
fd
7 changes: 7 additions & 0 deletions internal/fs/testdata/recursive/blob
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
fjdkal;fdjskc
xzc
axc
fdsf
adsf
das
fd
Empty file.
3 changes: 3 additions & 0 deletions internal/fs/testdata/recursive/foo/bar
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fjdsakl;fd
vcafcds
vca
1 change: 1 addition & 0 deletions internal/fs/testdata/recursive/vendor/skip1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
this file ought to be skipped
Empty file.
1 change: 1 addition & 0 deletions internal/gps/_testdata/src/cycle/a.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package cycle

import (
"cycle/one"

"github.com/golang/dep/internal/gps"
)

Expand Down
1 change: 1 addition & 0 deletions internal/gps/_testdata/src/cycle/one/a.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package one

import (
"cycle/two"

"github.com/golang/dep/internal/gps"
)

Expand Down
1 change: 1 addition & 0 deletions internal/gps/_testdata/src/cycle/two/a.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package two

import (
"cycle"

"github.com/golang/dep/internal/gps"
)

Expand Down
3 changes: 2 additions & 1 deletion internal/gps/_testdata/src/missing/a.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ package simple
import (
"sort"

"github.com/golang/dep/internal/gps"
"missing/missing"

"github.com/golang/dep/internal/gps"
)

var (
Expand Down
Loading