From ffd7ff88cc7eba8e1992fa9f3e7af4c15779c481 Mon Sep 17 00:00:00 2001
From: zelig <viktor.tron@gmail.com>
Date: Thu, 12 Jul 2018 18:29:09 +0200
Subject: [PATCH 01/50] swarm/storage: filehasher = chunker spit + swarm hash

---
 swarm/storage/filehasher.go | 323 ++++++++++++++++++++++++++++++++++++
 swarm/storage/split.go      |  82 +++++++++
 swarm/storage/split_test.go |  61 +++++++
 3 files changed, 466 insertions(+)
 create mode 100644 swarm/storage/filehasher.go
 create mode 100644 swarm/storage/split.go
 create mode 100644 swarm/storage/split_test.go

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
new file mode 100644
index 0000000000..6fe7f90a7b
--- /dev/null
+++ b/swarm/storage/filehasher.go
@@ -0,0 +1,323 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package storage
+
+import (
+	"encoding/binary"
+	"sync"
+	"sync/atomic"
+)
+
+// SectionHasher is an asynchronous writer interface to a hash
+// it allows for concurrent and out-of-order writes of sections of the hash's input buffer
+// Sum can be called once the final length is known potentially before all sections are complete
+type SectionHasher interface {
+	Reset()
+	Write(idx int, section []byte)
+	SectionSize() int
+	Sum(b []byte, length int, meta []byte) []byte
+}
+
+// FileHasher is instantiated each time a file is swarm hashed
+// itself implements the ChunkHasher interface
+type FileHasher struct {
+	mtx         sync.Mutex           // RW lock to add/read levels push and unshift batches
+	pool        sync.Pool            // batch resource pool
+	levels      []*level             // levels of the swarm hash tree
+	secsize     int                  // section size
+	chunks      int                  // number of chunks read
+	offset      int                  // byte offset (cursor) within chunk
+	read        int                  // length of input data read
+	length      int                  // known length of input data
+	branches    int                  // branching factor
+	hasherFunc  func() SectionHasher // hasher constructor
+	result      chan []byte          // channel to put hash asynchronously
+	lastSection []byte               // last section to record
+	lastSecPos  int                  // pos of section within last section
+}
+
+func New(hasherFunc func() SectionHasher, branches int) *FileHasher {
+	sh := &FileHasher{
+		hasherFunc: hasherFunc,
+		result:     make(chan []byte),
+	}
+	sh.pool = sync.Pool{
+		New: func() interface{} {
+			return sh.newBatch()
+		},
+	}
+	return sh
+}
+
+// level captures one level of chunks in the swarm hash tree
+// singletons are attached to the lowest level
+type level struct {
+	lev         int      // which level of the swarm hash tree
+	batches     []*batch // active batches on the level
+	*FileHasher          // pointer to the underlying hasher
+}
+
+// batch records chunks subsumed under the same parent intermediate chunk
+type batch struct {
+	nodes  []*node // nodes of the batches
+	index  int     // offset of the node
+	parent *node   // pointer to containing
+	*level         // pointer to containing level
+}
+
+// node represent a chunk and embeds an async interface to the chunk hash used
+type node struct {
+	hasher    SectionHasher // async hasher
+	pos       int           // index of the node chunk within its batch
+	secCnt    int32         // number of sections written
+	maxSecCnt int32         // maximum number of sections written
+	*batch                  // pointer to containing batch
+}
+
+// getParentLevel retrieves or creates the next level up from a node/batch/level
+// using lock for concurrent access
+func (lev *level) getLevel(pl int) (par *level) {
+	if pl < len(lev.levels) {
+		return lev.levels[pl]
+	}
+	par = &level{
+		lev: pl,
+	}
+	lev.levels = append(lev.levels, par)
+	return par
+}
+
+// getParent retrieves the parent node for the batch, creating a new batch if needed
+// allownil set to true will return a nil if parent
+func (b *batch) getParent(allowNil bool) (n *node) {
+	b.mtx.Lock()
+	defer b.mtx.Unlock()
+	if b.parent != nil || allowNil {
+		return b.parent
+	}
+	b.parent = b.getParentNode()
+	return b.parent
+}
+
+// getBatch looks up the parent batch on the next level up
+// caller must hold the lock
+func (lev *level) getBatch(index int) (pb *batch) {
+	// parent batch is memoised and typically expect 1 or 2 batches
+	// so this simple way of getting the appropriate batch is ok
+	for _, pb = range lev.batches {
+		if pb.index == index {
+			return pb
+		}
+	}
+	return nil
+}
+
+// getParentNode retrieves the parent node based on the batch indexes
+// if a new level or batch is required it creates them
+// caller must hold the lock
+func (b *batch) getParentNode() *node {
+	pos := b.index % b.branches
+	pi := 0
+	if b.index > 0 {
+		pi = (b.index - 1) / b.branches
+	}
+	b.mtx.Lock()
+	defer b.mtx.Unlock()
+	pl := b.getLevel(b.lev + 1)
+	pb := pl.getBatch(pi)
+	if pb != nil {
+		return pb.nodes[pos]
+	}
+	pb = b.pool.Get().(*batch)
+	pb.level = pl
+	pb.index = b.index / b.branches
+
+	pl.batches = append(pl.batches, pb)
+	return pb.nodes[pos]
+}
+
+// delink unshifts the levels batches
+// and releases the popped batch to the batch pools
+// must be called after Sum has returned
+// section writes or children no longer reference this batch
+func (b *batch) delink() {
+	b.mtx.Lock()
+	defer b.mtx.Unlock()
+	first := b.batches[0]
+	if first.index != b.index {
+		panic("non-initial batch finished first")
+	}
+	b.pool.Put(first)
+	b.batches = b.batches[1:]
+}
+
+// newBatch constructs a reuseable batch
+func (sh *FileHasher) newBatch() *batch {
+	nodes := make([]*node, sh.branches)
+	for i, _ := range nodes {
+		nodes[i] = &node{
+			pos:    i,
+			hasher: sh.hasherFunc(),
+		}
+	}
+	return &batch{
+		nodes: nodes,
+	}
+}
+
+// dataSpan returns the
+func (n *node) dataSpan() int64 {
+	secsize := n.hasher.SectionSize()
+	span := int64(4096 / secsize)
+	for l := 0; l < n.lev; l++ {
+		span *= int64(n.branches)
+	}
+	return span
+}
+
+// SimpleSplitter implements the hash.Hash interface for synchronous read from data
+// as data is written to it, it chops the input stream to section size buffers
+// and calls the section write on the SectionHasher
+
+// Reset puts FileHasher in a (re)useable state
+func (sh *FileHasher) Reset() {
+	sh.mtx.Lock()
+	defer sh.mtx.Unlock()
+	sh.levels = nil
+}
+
+// //
+// func (sh *FileHasher) Write(buf []byte) {
+// 	chunkSize := sh.secsize * sh.branches
+// 	start := sh.offset / sh.secsize
+// 	pos := sh.sections % sh.branches
+// 	n := sh.getLevel(0).getBatch(sh.chunks).nodes[pos]
+// 	read := chunkSize - sh.offset
+// 	copy(n.chunk[sh.offset:], buf)
+// 	var canBeFinal, isFinal bool
+// 	// assuming input never exceeds set length
+// 	if len(buf) <= read {
+// 		read = len(buf)
+// 		canBeFinal = true
+// 		sh.mtx.Lock()
+// 		sizeKnown := sh.length > 0
+// 		if sizeKnown {
+// 			isFinal = sh.chunks*chunkSize-sh.length <= chunkSize
+// 		} else {
+// 			canBeFinal = false
+// 			sh.mtx.Unlock()
+// 		}
+// 	}
+// 	end := start + (sh.offset%sh.secsize+read)/sh.secsize - 1
+// 	// if current chunk reaches the end
+// 	// write the final section
+// 	if canBeFinal {
+// 		end--
+// 		lastSecSize := (sh.offset + read) % sh.secsize
+// 		lastSecOffset := end * sh.secsize
+// 		sh.lastSection = n.chunk[lastSecOffset : lastSecOffset+lastSecSize]
+// 		sh.lastSecPos = end
+// 		// lock should be kept until lastSection and
+// 		sh.mtx.Unlock()
+// 		if isFinal {
+// 			n.write(end, sh.lastSection, true)
+// 		}
+// 	}
+// 	f := func() {
+// 		for i := start; i < end; i++ {
+// 			n.write(i, n.chunk[i*sh.secsize:(i+1)*sh.secsize], false)
+// 		}
+// 	}
+//
+// 	sh.offset = (sh.offset + read) % sh.secsize * sh.branches
+// 	rest := buf[read:]
+// 	if len(rest) == 0 {
+// 		go f()
+// 		return
+// 	}
+// 	sh.Write(rest)
+// }
+
+// Sum
+func (sh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
+	chunkSize := sh.secsize * sh.branches
+	sh.mtx.Lock()
+	if sh.read >= sh.length {
+		n := sh.getNode(sh.lastSecPos)
+		n.write(sh.lastSecPos, sh.lastSection, true)
+	}
+	sh.mtx.Unlock()
+	return <-sh.result
+}
+
+// write writes the section to the node at section idx
+// the final parameter indicates that the section is final
+// i.e., the read input buffer has been consumed
+func (n *node) write(idx int, section []byte, final bool) {
+	// write the section to the hasher
+	n.hasher.Write(idx, section)
+	var inferred bool
+	var maxSecCnt int32
+	if final {
+		// set number of chunks based on last index and save it
+		maxSecCnt = int32(idx + 1)
+		atomic.StoreInt32(&n.maxSecCnt, maxSecCnt)
+	} else {
+		// load max number of sections (known from a previous call to final or hash)
+		maxSecCnt = atomic.LoadInt32(&n.maxSecCnt)
+		if maxSecCnt == 0 {
+			inferred = true
+			maxSecCnt = int32(n.branches)
+		}
+	}
+
+	// another section is written, increment secCnt
+	secCnt := atomic.AddInt32(&n.secCnt, 1)
+
+	// if all branches been written do sum
+	// since secCnt is > 0 by now, the condition  is not satisfied iff
+	// * maxSecCnt is set and reached or
+	// * secCnt is n.branches
+	if secCnt%maxSecCnt > 0 {
+		return
+	}
+	// final flag either because
+	// * argument explicit about it OR
+	// * was set earlier by a call to final
+	go func() {
+		defer n.batch.delink()
+		final = final || !inferred
+		corr := n.hasher.SectionSize() - len(section)
+		length := int(maxSecCnt)*n.hasher.SectionSize() - corr
+		// can subtract corr directly from span assuming that shorter sections can only occur on level 0
+		span := n.dataSpan()*int64(maxSecCnt) - int64(corr)
+		meta := make([]byte, 8)
+		binary.BigEndian.PutUint64(meta, uint64(span))
+		// blocking call to Sum (releases resource, so node hasher is reusable)
+		hash := n.hasher.Sum(nil, length, meta)
+		// before return, delink the batch
+		defer n.delink()
+		// if the final section is batch 0 / pos 0 then it is
+		allowNil := final && n.index == 0 && n.pos == 0
+		pn := n.getParent(allowNil)
+		if pn == nil {
+			n.result <- hash
+			return
+		}
+		pn.write(n.pos, hash, final)
+	}()
+}
diff --git a/swarm/storage/split.go b/swarm/storage/split.go
new file mode 100644
index 0000000000..2bdf423e72
--- /dev/null
+++ b/swarm/storage/split.go
@@ -0,0 +1,82 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package storage
+
+import (
+	"context"
+	"io"
+)
+
+// SimpleSplitter implements the io.ReaderFrom interface for synchronous read from data
+// as data is written to it, it chops the input stream to section size buffers
+// and calls the section write on the SectionHasher
+type SimpleSplitter struct {
+	hasher  Hash
+	bufsize int
+	result  chan []byte
+}
+
+func (s *SimpleSplitter) Hash(ctx context.Context, r io.Reader) ([]byte, error) {
+	errc := make(chan error)
+	go func() {
+		select {
+		case errc <- s.ReadFrom(r):
+			return nil, err
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
+	}()
+
+}
+
+//
+func NewSimpleSplitter(h Hash, bufsize int) *SimpleSplitter {
+	return &SimpleSplitter{
+		hasher:  h,
+		bufsize: bufsize,
+		result:  make(chan []byte),
+	}
+}
+
+//
+func (s *SimpleSplitter) ReadFrom(r io.Reader) error {
+	var read int64
+	buf := make([]byte, s.bufsize)
+	for {
+		n, err := r.Read(buf)
+		if err != nil && err != io.EOF {
+			return err
+		}
+		s.hasher.Write(buf[:n])
+		read += int64(n)
+		if err == io.EOF {
+			go func() {
+				s.result <- s.hasher.Sum(read)
+			}()
+			return nil
+		}
+	}
+}
+
+func (s *SimpleSplitter) Sum(ctx context.Context) ([]byte, error) {
+	select {
+	case <-ctx.Done():
+		return nil, ctx.Err()
+	case sum := <-s.result:
+		return sum, nil
+	}
+}
diff --git a/swarm/storage/split_test.go b/swarm/storage/split_test.go
new file mode 100644
index 0000000000..bc27ae0589
--- /dev/null
+++ b/swarm/storage/split_test.go
@@ -0,0 +1,61 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package storage
+
+import (
+	"context"
+	"io"
+)
+
+const DefaultChunkCount = 2
+var MaxExcessSize = DefaultChunkCount
+
+func TestAsyncWriteFromReaderCorrectness(t *testing.T) {
+  data := make([]byte, DefaultChunkSize*DefaultChunkCount+rand.Intn(MaxExcessSize))
+  reader := bytes.NewReader(b)
+  fh := &fakeHasher{}
+  splitter := NewSimpleSplitter(fh, bufsize)
+
+  n, err := io.Copy(splitter, reader)
+  if err != nil {
+    if err == io.EOF {
+      got = <-fh.result
+  }
+
+}
+
+type fakeBaseHasherJoiner struct {
+  input []byte
+}
+
+func (fh *fakeBaseHasherJoiner) Reset() { fh.input = nil; return}
+func (fh *fakeBaseHasherJoiner) Write(b []byte) { fh.input = append(fh.input, b...) }
+func (fh *fakeBaseHasherJoiner) Sum([]byte) []byte { return fh.input }
+func (fh *fakeBaseHasherJoiner) BlockSize() int { return 64 }
+func (fh *fakeBaseHasherJoiner) Size() int { return 32 }
+
+type fakeHasher struct {
+  input []byte
+  output []byte
+}
+
+func newFakeHasher() *fakeHasher {
+  return &fakeHasher{}
+}
+
+func (fh *fakeHasher) Reset() { fh.input = nil; return}
+func (fh *fakeHasher) Write([]byte)

From 5c02b35440b7ba37f4b8f21a16cee2cff26867ed Mon Sep 17 00:00:00 2001
From: zelig <viktor.tron@gmail.com>
Date: Fri, 13 Jul 2018 12:58:01 +0200
Subject: [PATCH 02/50] chunkhasherstore

---
 swarm/storage/chunkhasherstore.go | 278 ++++++++++++++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 swarm/storage/chunkhasherstore.go

diff --git a/swarm/storage/chunkhasherstore.go b/swarm/storage/chunkhasherstore.go
new file mode 100644
index 0000000000..681538a8d9
--- /dev/null
+++ b/swarm/storage/chunkhasherstore.go
@@ -0,0 +1,278 @@
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package storage
+
+import (
+	"context"
+	"io"
+)
+
+
+type chunkEncryption struct {
+	spanEncryption encryption.Encryption
+	dataEncryption encryption.Encryption
+}
+
+type FileHasherStore struct {
+	store           ChunkStore
+	hashFunc        SwarmHasher
+	chunkEncryption *chunkEncryption
+	hashSize        int   // content hash size
+	refSize         int64 // reference size (content hash + possibly encryption key)
+	wg              *sync.WaitGroup
+	closed          chan struct{}
+}
+
+func newChunkEncryption(chunkSize, refSize int64) *chunkEncryption {
+	return &chunkEncryption{
+		spanEncryption: encryption.New(0, uint32(chunkSize/refSize), sha3.NewKeccak256),
+		dataEncryption: encryption.New(int(chunkSize), 0, sha3.NewKeccak256),
+	}
+}
+
+// NewFileHasherStore creates a FileHasherStore object, which implements Putter and Getter interfaces.
+// With the FileHasherStore you can put and get chunk data (which is just []byte) into a ChunkStore
+// and the FileHasherStore will take core of encryption/decryption of data if necessary
+func NewFileHasherStore(chunkStore ChunkStore, hashFunc SectionHasherFunc, toEncrypt bool, erasure bool) *FileHasherStore {
+	var chunkEncryption *chunkEncryption
+	f := func(children []byte) SectionHasher {
+		return hashFunc()
+	}
+	if erasure {
+		f = func(children []byte) SectionHasher {
+
+		}
+	}
+	hashSize := hashFunc().Size()
+	refSize := int64(hashSize)
+	if toEncrypt {
+		refSize += encryption.KeyLength
+		chunkEncryption = newChunkEncryption(DefaultChunkSize, refSize)
+	}
+
+	return &FileHasherStore{
+		store:           chunkStore,
+		hashFunc:        hashFunc,
+		hashSize:        hashSize,
+		refSize:         refSize,
+		wg:              &sync.WaitGroup{},
+		closed:          make(chan struct{}),
+	}
+}
+
+
+// extensions of the base chunk hasher (SectionHasher interface)
+
+// wrapper that completes a batch of child chunks using CRS erasure coding
+//
+type redundanteChunkHasher struct {
+	SectionHasher
+	// erasure
+}
+
+//
+type encryptedChunkHasher struct {
+	chunkEncryption *chunkEncryption
+	SectionHasher
+}
+
+type storeChunkHasher struct {
+	SectionHasher
+	put func(Address, ChunkData) error
+}
+
+// New is the function called by the splitter/filehasher when creating a node
+func (fhs *FileHasherStore) NewChunkHasher() SwarmHash {
+	return &encryptedChunkHasherStorer{
+		hasher: fhs.hashFunc()
+		chunkEncryption: chunkEncryption,
+		getChunkData: func(int) ChunkData,
+	}
+}
+
+func (e *encryptedChunkHasherStorer) Write(i int, b []byte) {
+	// call encrypt
+	e.hasher.Write(i, b)
+}
+
+func  (e *encryptedChunkHasherStorer) Sum(b []byte, length int, meta []byte) {
+	// length == e.DataSize()
+
+	length = e.complete(length, e.hasher.DataSize(), e.getChunkData, e.hasher.Write)
+	// pan and encrypt
+	return e.hasher.Sum(b, length, meta)
+}
+
+
+// Put stores the chunkData into the ChunkStore of the FileHasherStore and returns the reference.
+// If FileHasherStore has a chunkEncryption object, the data will be encrypted.
+// Asynchronous function, the data will not necessarily be stored when it returns.
+func (h *FileHasherStore) Put(chunkData ChunkData) (Reference, error) {
+	c := chunkData
+	size := chunkData.Size()
+	var encryptionKey encryption.Key
+	if h.chunkEncryption != nil {
+		var err error
+		c, encryptionKey, err = h.encryptChunkData(chunkData)
+		if err != nil {
+			return nil, err
+		}
+	}
+	chunk := h.createChunk(c, size)
+
+	h.storeChunk(chunk)
+
+	return Reference(append(chunk.Addr, encryptionKey...)), nil
+}
+
+// Get returns data of the chunk with the given reference (retrieved from the ChunkStore of FileHasherStore).
+// If the data is encrypted and the reference contains an encryption key, it will be decrypted before
+// return.
+func (h *FileHasherStore) Get(ref Reference) (ChunkData, error) {
+	key, encryptionKey, err := parseReference(ref, h.hashSize)
+	if err != nil {
+		return nil, err
+	}
+	toDecrypt := (encryptionKey != nil)
+
+	chunk, err := h.store.Get(key)
+	if err != nil {
+		return nil, err
+	}
+
+	chunkData := chunk.SData
+	if toDecrypt {
+		var err error
+		chunkData, err = h.decryptChunkData(chunkData, encryptionKey)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return chunkData, nil
+}
+
+// Close indicates that no more chunks will be put with the FileHasherStore, so the Wait
+// function can return when all the previously put chunks has been stored.
+func (h *FileHasherStore) Close() {
+	close(h.closed)
+}
+
+// Wait returns when
+//    1) the Close() function has been called and
+//    2) all the chunks which has been Put has been stored
+func (h *FileHasherStore) Wait(ctx context.Context) error {
+	<-h.closed
+	h.wg.Wait()
+	return nil
+}
+
+func (h *FileHasherStore) createHash(chunkData ChunkData) Address {
+	hasher := h.hashFunc()
+	hasher.ResetWithLength(chunkData[:8]) // 8 bytes of length
+	hasher.Write(chunkData[8:])           // minus 8 []byte length
+	return hasher.Sum(nil)
+}
+
+func (h *FileHasherStore) createChunk(chunkData ChunkData, chunkSize int64) *Chunk {
+	hash := h.createHash(chunkData)
+	chunk := NewChunk(hash, nil)
+	chunk.SData = chunkData
+	chunk.Size = chunkSize
+
+	return chunk
+}
+
+func (h *FileHasherStore) encryptChunkData(chunkData ChunkData) (ChunkData, encryption.Key, error) {
+	if len(chunkData) < 8 {
+		return nil, nil, fmt.Errorf("Invalid ChunkData, min length 8 got %v", len(chunkData))
+	}
+
+	encryptionKey, err := encryption.GenerateRandomKey()
+	if err != nil {
+		return nil, nil, err
+	}
+
+	encryptedSpan, err := h.chunkEncryption.spanEncryption.Encrypt(chunkData[:8], encryptionKey)
+	if err != nil {
+		return nil, nil, err
+	}
+	encryptedData, err := h.chunkEncryption.dataEncryption.Encrypt(chunkData[8:], encryptionKey)
+	if err != nil {
+		return nil, nil, err
+	}
+	c := make(ChunkData, len(encryptedSpan)+len(encryptedData))
+	copy(c[:8], encryptedSpan)
+	copy(c[8:], encryptedData)
+	return c, encryptionKey, nil
+}
+
+func (h *FileHasherStore) decryptChunkData(chunkData ChunkData, encryptionKey encryption.Key) (ChunkData, error) {
+	if len(chunkData) < 8 {
+		return nil, fmt.Errorf("Invalid ChunkData, min length 8 got %v", len(chunkData))
+	}
+
+	decryptedSpan, err := h.chunkEncryption.spanEncryption.Decrypt(chunkData[:8], encryptionKey)
+	if err != nil {
+		return nil, err
+	}
+
+	decryptedData, err := h.chunkEncryption.dataEncryption.Decrypt(chunkData[8:], encryptionKey)
+	if err != nil {
+		return nil, err
+	}
+
+	// removing extra bytes which were just added for padding
+	length := ChunkData(decryptedSpan).Size()
+	for length > DefaultChunkSize {
+		length = length + (DefaultChunkSize - 1)
+		length = length / DefaultChunkSize
+		length *= h.refSize
+	}
+
+	c := make(ChunkData, length+8)
+	copy(c[:8], decryptedSpan)
+	copy(c[8:], decryptedData[:length])
+
+	return c[:length+8], nil
+}
+
+func (h *FileHasherStore) RefSize() int64 {
+	return h.refSize
+}
+
+func (h *FileHasherStore) storeChunk(chunk *Chunk) {
+	h.wg.Add(1)
+	go func() {
+		<-chunk.dbStoredC
+		h.wg.Done()
+	}()
+	h.store.Put(chunk)
+}
+
+func parseReference(ref Reference, hashSize int) (Address, encryption.Key, error) {
+	encryptedKeyLength := hashSize + encryption.KeyLength
+	switch len(ref) {
+	case KeyLength:
+		return Address(ref), nil, nil
+	case encryptedKeyLength:
+		encKeyIdx := len(ref) - encryption.KeyLength
+		return Address(ref[:encKeyIdx]), encryption.Key(ref[encKeyIdx:]), nil
+	default:
+		return nil, nil, fmt.Errorf("Invalid reference length, expected %v or %v got %v", hashSize, encryptedKeyLength, len(ref))
+	}
+
+}

From 66d7071edab659a315b829d277f1b373ab923b69 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Mon, 16 Jul 2018 10:16:00 +0200
Subject: [PATCH 03/50] swarm/storage: WIP Create splitter test for FileHasher

---
 cmd/swarm/list.go                 |   1 +
 swarm/storage/chunkhasherstore.go | 278 ----------------------
 swarm/storage/filehasher.go       | 373 ++++++++++++++----------------
 swarm/storage/split.go            |  87 ++++---
 swarm/storage/split_test.go       |  99 ++++++--
 5 files changed, 304 insertions(+), 534 deletions(-)
 delete mode 100644 swarm/storage/chunkhasherstore.go

diff --git a/cmd/swarm/list.go b/cmd/swarm/list.go
index 5d35154a57..6344da4dc8 100644
--- a/cmd/swarm/list.go
+++ b/cmd/swarm/list.go
@@ -37,6 +37,7 @@ var listCommand = cli.Command{
 }
 
 func list(ctx *cli.Context) {
+	fmt.Println("foo\n", ctx.GlobalString("password"))
 	args := ctx.Args()
 
 	if len(args) < 1 {
diff --git a/swarm/storage/chunkhasherstore.go b/swarm/storage/chunkhasherstore.go
deleted file mode 100644
index 681538a8d9..0000000000
--- a/swarm/storage/chunkhasherstore.go
+++ /dev/null
@@ -1,278 +0,0 @@
-// Copyright 2017 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-
-package storage
-
-import (
-	"context"
-	"io"
-)
-
-
-type chunkEncryption struct {
-	spanEncryption encryption.Encryption
-	dataEncryption encryption.Encryption
-}
-
-type FileHasherStore struct {
-	store           ChunkStore
-	hashFunc        SwarmHasher
-	chunkEncryption *chunkEncryption
-	hashSize        int   // content hash size
-	refSize         int64 // reference size (content hash + possibly encryption key)
-	wg              *sync.WaitGroup
-	closed          chan struct{}
-}
-
-func newChunkEncryption(chunkSize, refSize int64) *chunkEncryption {
-	return &chunkEncryption{
-		spanEncryption: encryption.New(0, uint32(chunkSize/refSize), sha3.NewKeccak256),
-		dataEncryption: encryption.New(int(chunkSize), 0, sha3.NewKeccak256),
-	}
-}
-
-// NewFileHasherStore creates a FileHasherStore object, which implements Putter and Getter interfaces.
-// With the FileHasherStore you can put and get chunk data (which is just []byte) into a ChunkStore
-// and the FileHasherStore will take core of encryption/decryption of data if necessary
-func NewFileHasherStore(chunkStore ChunkStore, hashFunc SectionHasherFunc, toEncrypt bool, erasure bool) *FileHasherStore {
-	var chunkEncryption *chunkEncryption
-	f := func(children []byte) SectionHasher {
-		return hashFunc()
-	}
-	if erasure {
-		f = func(children []byte) SectionHasher {
-
-		}
-	}
-	hashSize := hashFunc().Size()
-	refSize := int64(hashSize)
-	if toEncrypt {
-		refSize += encryption.KeyLength
-		chunkEncryption = newChunkEncryption(DefaultChunkSize, refSize)
-	}
-
-	return &FileHasherStore{
-		store:           chunkStore,
-		hashFunc:        hashFunc,
-		hashSize:        hashSize,
-		refSize:         refSize,
-		wg:              &sync.WaitGroup{},
-		closed:          make(chan struct{}),
-	}
-}
-
-
-// extensions of the base chunk hasher (SectionHasher interface)
-
-// wrapper that completes a batch of child chunks using CRS erasure coding
-//
-type redundanteChunkHasher struct {
-	SectionHasher
-	// erasure
-}
-
-//
-type encryptedChunkHasher struct {
-	chunkEncryption *chunkEncryption
-	SectionHasher
-}
-
-type storeChunkHasher struct {
-	SectionHasher
-	put func(Address, ChunkData) error
-}
-
-// New is the function called by the splitter/filehasher when creating a node
-func (fhs *FileHasherStore) NewChunkHasher() SwarmHash {
-	return &encryptedChunkHasherStorer{
-		hasher: fhs.hashFunc()
-		chunkEncryption: chunkEncryption,
-		getChunkData: func(int) ChunkData,
-	}
-}
-
-func (e *encryptedChunkHasherStorer) Write(i int, b []byte) {
-	// call encrypt
-	e.hasher.Write(i, b)
-}
-
-func  (e *encryptedChunkHasherStorer) Sum(b []byte, length int, meta []byte) {
-	// length == e.DataSize()
-
-	length = e.complete(length, e.hasher.DataSize(), e.getChunkData, e.hasher.Write)
-	// pan and encrypt
-	return e.hasher.Sum(b, length, meta)
-}
-
-
-// Put stores the chunkData into the ChunkStore of the FileHasherStore and returns the reference.
-// If FileHasherStore has a chunkEncryption object, the data will be encrypted.
-// Asynchronous function, the data will not necessarily be stored when it returns.
-func (h *FileHasherStore) Put(chunkData ChunkData) (Reference, error) {
-	c := chunkData
-	size := chunkData.Size()
-	var encryptionKey encryption.Key
-	if h.chunkEncryption != nil {
-		var err error
-		c, encryptionKey, err = h.encryptChunkData(chunkData)
-		if err != nil {
-			return nil, err
-		}
-	}
-	chunk := h.createChunk(c, size)
-
-	h.storeChunk(chunk)
-
-	return Reference(append(chunk.Addr, encryptionKey...)), nil
-}
-
-// Get returns data of the chunk with the given reference (retrieved from the ChunkStore of FileHasherStore).
-// If the data is encrypted and the reference contains an encryption key, it will be decrypted before
-// return.
-func (h *FileHasherStore) Get(ref Reference) (ChunkData, error) {
-	key, encryptionKey, err := parseReference(ref, h.hashSize)
-	if err != nil {
-		return nil, err
-	}
-	toDecrypt := (encryptionKey != nil)
-
-	chunk, err := h.store.Get(key)
-	if err != nil {
-		return nil, err
-	}
-
-	chunkData := chunk.SData
-	if toDecrypt {
-		var err error
-		chunkData, err = h.decryptChunkData(chunkData, encryptionKey)
-		if err != nil {
-			return nil, err
-		}
-	}
-	return chunkData, nil
-}
-
-// Close indicates that no more chunks will be put with the FileHasherStore, so the Wait
-// function can return when all the previously put chunks has been stored.
-func (h *FileHasherStore) Close() {
-	close(h.closed)
-}
-
-// Wait returns when
-//    1) the Close() function has been called and
-//    2) all the chunks which has been Put has been stored
-func (h *FileHasherStore) Wait(ctx context.Context) error {
-	<-h.closed
-	h.wg.Wait()
-	return nil
-}
-
-func (h *FileHasherStore) createHash(chunkData ChunkData) Address {
-	hasher := h.hashFunc()
-	hasher.ResetWithLength(chunkData[:8]) // 8 bytes of length
-	hasher.Write(chunkData[8:])           // minus 8 []byte length
-	return hasher.Sum(nil)
-}
-
-func (h *FileHasherStore) createChunk(chunkData ChunkData, chunkSize int64) *Chunk {
-	hash := h.createHash(chunkData)
-	chunk := NewChunk(hash, nil)
-	chunk.SData = chunkData
-	chunk.Size = chunkSize
-
-	return chunk
-}
-
-func (h *FileHasherStore) encryptChunkData(chunkData ChunkData) (ChunkData, encryption.Key, error) {
-	if len(chunkData) < 8 {
-		return nil, nil, fmt.Errorf("Invalid ChunkData, min length 8 got %v", len(chunkData))
-	}
-
-	encryptionKey, err := encryption.GenerateRandomKey()
-	if err != nil {
-		return nil, nil, err
-	}
-
-	encryptedSpan, err := h.chunkEncryption.spanEncryption.Encrypt(chunkData[:8], encryptionKey)
-	if err != nil {
-		return nil, nil, err
-	}
-	encryptedData, err := h.chunkEncryption.dataEncryption.Encrypt(chunkData[8:], encryptionKey)
-	if err != nil {
-		return nil, nil, err
-	}
-	c := make(ChunkData, len(encryptedSpan)+len(encryptedData))
-	copy(c[:8], encryptedSpan)
-	copy(c[8:], encryptedData)
-	return c, encryptionKey, nil
-}
-
-func (h *FileHasherStore) decryptChunkData(chunkData ChunkData, encryptionKey encryption.Key) (ChunkData, error) {
-	if len(chunkData) < 8 {
-		return nil, fmt.Errorf("Invalid ChunkData, min length 8 got %v", len(chunkData))
-	}
-
-	decryptedSpan, err := h.chunkEncryption.spanEncryption.Decrypt(chunkData[:8], encryptionKey)
-	if err != nil {
-		return nil, err
-	}
-
-	decryptedData, err := h.chunkEncryption.dataEncryption.Decrypt(chunkData[8:], encryptionKey)
-	if err != nil {
-		return nil, err
-	}
-
-	// removing extra bytes which were just added for padding
-	length := ChunkData(decryptedSpan).Size()
-	for length > DefaultChunkSize {
-		length = length + (DefaultChunkSize - 1)
-		length = length / DefaultChunkSize
-		length *= h.refSize
-	}
-
-	c := make(ChunkData, length+8)
-	copy(c[:8], decryptedSpan)
-	copy(c[8:], decryptedData[:length])
-
-	return c[:length+8], nil
-}
-
-func (h *FileHasherStore) RefSize() int64 {
-	return h.refSize
-}
-
-func (h *FileHasherStore) storeChunk(chunk *Chunk) {
-	h.wg.Add(1)
-	go func() {
-		<-chunk.dbStoredC
-		h.wg.Done()
-	}()
-	h.store.Put(chunk)
-}
-
-func parseReference(ref Reference, hashSize int) (Address, encryption.Key, error) {
-	encryptedKeyLength := hashSize + encryption.KeyLength
-	switch len(ref) {
-	case KeyLength:
-		return Address(ref), nil, nil
-	case encryptedKeyLength:
-		encKeyIdx := len(ref) - encryption.KeyLength
-		return Address(ref[:encKeyIdx]), encryption.Key(ref[encKeyIdx:]), nil
-	default:
-		return nil, nil, fmt.Errorf("Invalid reference length, expected %v or %v got %v", hashSize, encryptedKeyLength, len(ref))
-	}
-
-}
diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 6fe7f90a7b..2f4c2c562b 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -28,26 +28,23 @@ import (
 type SectionHasher interface {
 	Reset()
 	Write(idx int, section []byte)
-	SectionSize() int
+	Size() int
+	BlockSize() int
+	ChunkSize() int
 	Sum(b []byte, length int, meta []byte) []byte
 }
 
 // FileHasher is instantiated each time a file is swarm hashed
 // itself implements the ChunkHasher interface
 type FileHasher struct {
-	mtx         sync.Mutex           // RW lock to add/read levels push and unshift batches
-	pool        sync.Pool            // batch resource pool
-	levels      []*level             // levels of the swarm hash tree
-	secsize     int                  // section size
-	chunks      int                  // number of chunks read
-	offset      int                  // byte offset (cursor) within chunk
-	read        int                  // length of input data read
-	length      int                  // known length of input data
-	branches    int                  // branching factor
-	hasherFunc  func() SectionHasher // hasher constructor
-	result      chan []byte          // channel to put hash asynchronously
-	lastSection []byte               // last section to record
-	lastSecPos  int                  // pos of section within last section
+	mtx        sync.Mutex           // RW lock to add/read levels push and unshift batches
+	pool       sync.Pool            // batch resource pool
+	levels     []*level             // levels of the swarm hash tree
+	secsize    int                  // section size
+	branches   int                  // branching factor
+	hasherFunc func() SectionHasher // hasher constructor
+	result     chan []byte          // channel to put hash asynchronously
+	size       int
 }
 
 func New(hasherFunc func() SectionHasher, branches int) *FileHasher {
@@ -60,15 +57,17 @@ func New(hasherFunc func() SectionHasher, branches int) *FileHasher {
 			return sh.newBatch()
 		},
 	}
+	sh.size = hasherFunc().Size()
 	return sh
 }
 
 // level captures one level of chunks in the swarm hash tree
 // singletons are attached to the lowest level
 type level struct {
-	lev         int      // which level of the swarm hash tree
-	batches     []*batch // active batches on the level
-	*FileHasher          // pointer to the underlying hasher
+	levelIndex int // which level of the swarm hash tree
+	//batches     []*batch // active batches on the level
+	batches     sync.Map
+	*FileHasher // pointer to the underlying hasher
 }
 
 // batch records chunks subsumed under the same parent intermediate chunk
@@ -76,16 +75,16 @@ type batch struct {
 	nodes  []*node // nodes of the batches
 	index  int     // offset of the node
 	parent *node   // pointer to containing
-	*level         // pointer to containing level
+	buffer *bytes.Buffer
+	*level // pointer to containing level
 }
 
 // node represent a chunk and embeds an async interface to the chunk hash used
 type node struct {
-	hasher    SectionHasher // async hasher
-	pos       int           // index of the node chunk within its batch
-	secCnt    int32         // number of sections written
-	maxSecCnt int32         // maximum number of sections written
-	*batch                  // pointer to containing batch
+	hasher SectionHasher // async hasher
+	pos    int           // index of the node chunk within its batch
+	secCnt int32         // number of sections written
+	*batch               // pointer to containing batch
 }
 
 // getParentLevel retrieves or creates the next level up from a node/batch/level
@@ -95,59 +94,29 @@ func (lev *level) getLevel(pl int) (par *level) {
 		return lev.levels[pl]
 	}
 	par = &level{
-		lev: pl,
+		levelIndex: pl,
 	}
 	lev.levels = append(lev.levels, par)
 	return par
 }
 
-// getParent retrieves the parent node for the batch, creating a new batch if needed
-// allownil set to true will return a nil if parent
-func (b *batch) getParent(allowNil bool) (n *node) {
-	b.mtx.Lock()
-	defer b.mtx.Unlock()
-	if b.parent != nil || allowNil {
-		return b.parent
+func (lev *level) getBatch(index int) *batch {
+	pbi, ok := lev.batches.Load(index)
+	if !ok {
+		return nil
 	}
-	b.parent = b.getParentNode()
-	return b.parent
+	return pbi.(*batch)
 }
 
-// getBatch looks up the parent batch on the next level up
-// caller must hold the lock
-func (lev *level) getBatch(index int) (pb *batch) {
-	// parent batch is memoised and typically expect 1 or 2 batches
-	// so this simple way of getting the appropriate batch is ok
-	for _, pb = range lev.batches {
-		if pb.index == index {
-			return pb
-		}
+// retrieve the batch within a level corresponding to the given index
+// if it does not currently exist, create it
+func (lev *level) getOrCreateBatch(index int) *batch {
+	pb := lev.getBatch(index)
+	if pb == nil {
+		pb = lev.pool.Get().(*batch)
+		lev.batches.Store(index, pb)
 	}
-	return nil
-}
-
-// getParentNode retrieves the parent node based on the batch indexes
-// if a new level or batch is required it creates them
-// caller must hold the lock
-func (b *batch) getParentNode() *node {
-	pos := b.index % b.branches
-	pi := 0
-	if b.index > 0 {
-		pi = (b.index - 1) / b.branches
-	}
-	b.mtx.Lock()
-	defer b.mtx.Unlock()
-	pl := b.getLevel(b.lev + 1)
-	pb := pl.getBatch(pi)
-	if pb != nil {
-		return pb.nodes[pos]
-	}
-	pb = b.pool.Get().(*batch)
-	pb.level = pl
-	pb.index = b.index / b.branches
-
-	pl.batches = append(pl.batches, pb)
-	return pb.nodes[pos]
+	return pb
 }
 
 // delink unshifts the levels batches
@@ -157,167 +126,163 @@ func (b *batch) getParentNode() *node {
 func (b *batch) delink() {
 	b.mtx.Lock()
 	defer b.mtx.Unlock()
-	first := b.batches[0]
-	if first.index != b.index {
-		panic("non-initial batch finished first")
-	}
-	b.pool.Put(first)
-	b.batches = b.batches[1:]
+	b.batches.Delete(b.index)
+	b.pool.Put(b)
+}
+
+// returns the digest size of the underlying hasher
+func (fh *FileHasher) Size() int {
+	return fh.size
 }
 
 // newBatch constructs a reuseable batch
-func (sh *FileHasher) newBatch() *batch {
+func (sh *FileHasher) newBatch() (bt *batch) {
 	nodes := make([]*node, sh.branches)
-	for i, _ := range nodes {
+	chunkSize := sh.ChunkSize()
+	bt = &batch{
+		buffer: make([]byte, sh.branches*chunkSize),
+		//buffer: bytes.NewBuffer(make([]byte, 0, sh.branches*sh.ChunkSize())),
+	}
+	for i := range nodes {
+		offset := chunkSize * i
 		nodes[i] = &node{
 			pos:    i,
 			hasher: sh.hasherFunc(),
+			buffer: batch[offset : offset+chunkSize],
 		}
 	}
-	return &batch{
-		nodes: nodes,
-	}
+	batch.nodes = nodes
+	return bt
 }
 
-// dataSpan returns the
-func (n *node) dataSpan() int64 {
-	secsize := n.hasher.SectionSize()
-	span := int64(4096 / secsize)
-	for l := 0; l < n.lev; l++ {
-		span *= int64(n.branches)
+func (sh *FileHasher) getNodeSectionBuffer(globalCount int) ([]byte, func()) {
+	batchIndex := globalCount / sh.branches * sh.ChunkSize()
+	batchPos := globalCount % sh.branches * sh.ChunkSize()
+	batchNodeIndex := batchPos / sh.ChunkSize()
+	batchNodePos := batchPosIndex % sh.ChunkSize()
+	return sh.batches[batchIndex].nodes[batchNodeIndex].getSectionBuffer(batchNodePos)
+}
+
+func (n *node) getSectionBuffer(p int) (int, func()) {
+	currentCount := atomic.AddInt32(&n.secCnt, 1)
+	nodeSectionByteOffset := (batchNodePos / sh.BlockSize()) * sh.BlockSize()
+	var doneFunc func()
+	if currentCount == int32(n.branches) {
+		doneFunc = n.done
 	}
-	return span
+	return n.buffer[nodeSectionByteOffset : nodeSectionByteOffset+sh.BlockSize()], batchNodeIndex, doneFunc
 }
 
-// SimpleSplitter implements the hash.Hash interface for synchronous read from data
-// as data is written to it, it chops the input stream to section size buffers
-// and calls the section write on the SectionHasher
+// dataSpan returns the size of data encoded under the current node, serialized as big endian uint64
+func (n *node) dataSpan() []byte {
+	//secsize := n.hasher.BlockSize()
+	span := uint64(n.hasher.ChunkSize())
+	for l := 0; l < n.levelIndex; l++ {
+		span *= uint64(n.branches)
+	}
+	meta := make([]byte, 8)
+	binary.BigEndian.PutUint64(meta, span)
+	return meta
+}
 
-// Reset puts FileHasher in a (re)useable state
-func (sh *FileHasher) Reset() {
-	sh.mtx.Lock()
-	defer sh.mtx.Unlock()
-	sh.levels = nil
+func (n *node) Write(sectionIndex int, section []byte) {
+	n.write(sectionIndex, section)
 }
 
-// //
-// func (sh *FileHasher) Write(buf []byte) {
-// 	chunkSize := sh.secsize * sh.branches
-// 	start := sh.offset / sh.secsize
-// 	pos := sh.sections % sh.branches
-// 	n := sh.getLevel(0).getBatch(sh.chunks).nodes[pos]
-// 	read := chunkSize - sh.offset
-// 	copy(n.chunk[sh.offset:], buf)
-// 	var canBeFinal, isFinal bool
-// 	// assuming input never exceeds set length
-// 	if len(buf) <= read {
-// 		read = len(buf)
-// 		canBeFinal = true
-// 		sh.mtx.Lock()
-// 		sizeKnown := sh.length > 0
-// 		if sizeKnown {
-// 			isFinal = sh.chunks*chunkSize-sh.length <= chunkSize
-// 		} else {
-// 			canBeFinal = false
-// 			sh.mtx.Unlock()
-// 		}
-// 	}
-// 	end := start + (sh.offset%sh.secsize+read)/sh.secsize - 1
-// 	// if current chunk reaches the end
-// 	// write the final section
-// 	if canBeFinal {
-// 		end--
-// 		lastSecSize := (sh.offset + read) % sh.secsize
-// 		lastSecOffset := end * sh.secsize
-// 		sh.lastSection = n.chunk[lastSecOffset : lastSecOffset+lastSecSize]
-// 		sh.lastSecPos = end
-// 		// lock should be kept until lastSection and
-// 		sh.mtx.Unlock()
-// 		if isFinal {
-// 			n.write(end, sh.lastSection, true)
-// 		}
-// 	}
-// 	f := func() {
-// 		for i := start; i < end; i++ {
-// 			n.write(i, n.chunk[i*sh.secsize:(i+1)*sh.secsize], false)
-// 		}
-// 	}
-//
-// 	sh.offset = (sh.offset + read) % sh.secsize * sh.branches
-// 	rest := buf[read:]
-// 	if len(rest) == 0 {
-// 		go f()
-// 		return
-// 	}
-// 	sh.Write(rest)
-// }
-
-// Sum
-func (sh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
-	chunkSize := sh.secsize * sh.branches
-	sh.mtx.Lock()
-	if sh.read >= sh.length {
-		n := sh.getNode(sh.lastSecPos)
-		n.write(sh.lastSecPos, sh.lastSection, true)
+func (n *node) write(sectionIndex int, section []byte) {
+	currentCount := atomic.AddInt32(&n.secCnt, 1)
+	n.hasher.Write(sectionIndex, section)
+	if currentCount == int32(n.branches) {
+		n.node()
 	}
-	sh.mtx.Unlock()
-	return <-sh.result
 }
 
-// write writes the section to the node at section idx
-// the final parameter indicates that the section is final
-// i.e., the read input buffer has been consumed
-func (n *node) write(idx int, section []byte, final bool) {
-	// write the section to the hasher
-	n.hasher.Write(idx, section)
-	var inferred bool
-	var maxSecCnt int32
-	if final {
-		// set number of chunks based on last index and save it
-		maxSecCnt = int32(idx + 1)
-		atomic.StoreInt32(&n.maxSecCnt, maxSecCnt)
+func (n *node) done() {
+	go func() {
+		parentBatchIndex := n.index / n.branches
+		parentBatch := n.levels[n.levelIndex+1].getBatch(parentBatchIndex)
+		parentNodeIndex := n.index % n.branches
+		parentNode := parentBatch.nodes[parentNodeIndex]
+		parentNode.write(n.pos, n.hasher.Sum(nil, n.hasher.ChunkSize(), parentNode.dataSpan()))
+	}()
+}
+
+// length is global length
+func (n *node) sum(length int, nodeSpan int) {
+
+	// nodeSpan is the total byte size of a complete tree under the current node
+	nodeSpan *= n.branches
+
+	// if a new batch would be started
+	batchSpan := nodeSpan * n.branches
+	nodeIndex := length % batchSpan
+	var parentNode *node
+	if nodeIndex == 0 && len(n.levels) > n.levelIndex+1 {
+		batchIndex := (length-1)/batchSpan + 1
+		parentNode = n.levels[n.levelIndex+1].getBatch(batchIndex).nodes[nodeIndex]
+		parentNode.sum(length, nodeSpan)
+		return
+	}
+
+	// dataLength is the actual length of data under the current node
+	dataLength := uint64(length % nodeSpan)
+
+	// meta is the length of actual data in the nodespan
+	meta := make([]byte, 8)
+	binary.BigEndian.PutUint64(meta, dataLength)
+
+	// bmtLength is the actual length of bytes in the chunk
+	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
+	var bmtLength uint64
+	if n.levelIndex == 0 {
+		bmtLength = dataLength
 	} else {
-		// load max number of sections (known from a previous call to final or hash)
-		maxSecCnt = atomic.LoadInt32(&n.maxSecCnt)
-		if maxSecCnt == 0 {
-			inferred = true
-			maxSecCnt = int32(n.branches)
-		}
+		bmtLength = ((dataLength - 1) / uint64((nodeSpan/n.branches+1)*n.hasher.BlockSize()))
 	}
 
-	// another section is written, increment secCnt
-	secCnt := atomic.AddInt32(&n.secCnt, 1)
+	hash := n.hasher.Sum(nil, int(bmtLength), meta)
 
-	// if all branches been written do sum
-	// since secCnt is > 0 by now, the condition  is not satisfied iff
-	// * maxSecCnt is set and reached or
-	// * secCnt is n.branches
-	if secCnt%maxSecCnt > 0 {
+	// are we on the root level?
+	if parentNode != nil {
+		parentNode.sum(length, nodeSpan)
 		return
 	}
-	// final flag either because
-	// * argument explicit about it OR
-	// * was set earlier by a call to final
-	go func() {
-		defer n.batch.delink()
-		final = final || !inferred
-		corr := n.hasher.SectionSize() - len(section)
-		length := int(maxSecCnt)*n.hasher.SectionSize() - corr
-		// can subtract corr directly from span assuming that shorter sections can only occur on level 0
-		span := n.dataSpan()*int64(maxSecCnt) - int64(corr)
-		meta := make([]byte, 8)
-		binary.BigEndian.PutUint64(meta, uint64(span))
-		// blocking call to Sum (releases resource, so node hasher is reusable)
-		hash := n.hasher.Sum(nil, length, meta)
-		// before return, delink the batch
-		defer n.delink()
-		// if the final section is batch 0 / pos 0 then it is
-		allowNil := final && n.index == 0 && n.pos == 0
-		pn := n.getParent(allowNil)
-		if pn == nil {
-			n.result <- hash
-			return
-		}
-		pn.write(n.pos, hash, final)
-	}()
+
+	n.result <- hash
+}
+
+func (fh *FileHasher) ChunkSize() int {
+	return fh.branches * fh.secsize
+}
+
+// Louis note to self: secsize is the same as the size of the reference
+// Invoked after we know the actual length of the file
+// Will create the last node on the data level of the hash tree matching the length
+func (fh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
+
+	// handle edge case where the file is empty
+	if length == 0 {
+		return fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
+	}
+
+	// calculate the index the last batch
+	lastBatchIndexInFile := (length - 1) / fh.ChunkSize() * fh.branches
+
+	// calculate the node index within the last batch
+	byteIndexInLastBatch := length - lastBatchIndexInFile*fh.ChunkSize()*fh.branches
+	nodeIndexInLastBatch := (byteIndexInLastBatch - 1) / fh.ChunkSize()
+
+	// get the last node
+	lastNode := fh.levels[0].getBatch(lastBatchIndexInFile).nodes[nodeIndexInLastBatch]
+
+	// asynchronously call sum on this node and wait for the final result
+	go lastNode.sum(length, fh.ChunkSize())
+	return <-fh.result
+}
+
+// Reset puts FileHasher in a (re)useable state
+func (sh *FileHasher) Reset() {
+	sh.mtx.Lock()
+	defer sh.mtx.Unlock()
+	sh.levels = nil
 }
diff --git a/swarm/storage/split.go b/swarm/storage/split.go
index 2bdf423e72..377bb5863b 100644
--- a/swarm/storage/split.go
+++ b/swarm/storage/split.go
@@ -17,57 +17,88 @@
 package storage
 
 import (
+	"bytes"
 	"context"
+	"encoding/binary"
 	"io"
+
+	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
 // SimpleSplitter implements the io.ReaderFrom interface for synchronous read from data
 // as data is written to it, it chops the input stream to section size buffers
 // and calls the section write on the SectionHasher
 type SimpleSplitter struct {
-	hasher  Hash
-	bufsize int
-	result  chan []byte
+	hasher       SectionHasher
+	sectionCount int
+	count        int64
+	result       chan []byte
+	readBuffer   []byte
+	writeBuffer  []byte
 }
 
-func (s *SimpleSplitter) Hash(ctx context.Context, r io.Reader) ([]byte, error) {
-	errc := make(chan error)
-	go func() {
-		select {
-		case errc <- s.ReadFrom(r):
-			return nil, err
-		case <-ctx.Done():
-			return nil, ctx.Err()
-		}
-	}()
+//
+func NewSimpleSplitter(h SectionHasher, bufferSize int) *SimpleSplitter {
+	writeBufferBytes := make([]byte, 0, h.BlockSize())
+	return &SimpleSplitter{
+		hasher:      h,
+		result:      make(chan []byte),
+		readBuffer:  make([]byte, bufferSize),
+		writeBuffer: bytes.NewBuffer(writeBufferBytes),
+	}
+}
 
+func (s *SimpleSplitter) Write(buf []byte) (int, error) {
+	for len(buf) > 0 {
+		sectionOffset := s.section - s.hasher.BlockSize()
+		writeBuffer := s.hasher.getBuffer(s.count)
+		c := len(buf)
+		if c > len(s.hasher.BlockSize()) {
+			c = len(s.hasher.BlockSize())
+		}
+		s.hasher.Write(s.sectionCount, s.writeBuffer.Bytes())
+		s.count += c
+		s.sectionCount++
+		log.Debug("writer", "c", c)
+		buf = buf[c:]
+		s.sectionCount++
+	}
+	return int(s.count), nil
 }
 
-//
-func NewSimpleSplitter(h Hash, bufsize int) *SimpleSplitter {
-	return &SimpleSplitter{
-		hasher:  h,
-		bufsize: bufsize,
-		result:  make(chan []byte),
+func (s *SimpleSplitter) Close() error {
+	if s.writeBuffer.Len() > 0 {
+		log.Debug("writer flush on close", "c", s.writeBuffer.Len())
+		s.hasher.Write(s.sectionCount, s.writeBuffer.Bytes())
 	}
+	s.count = 0
+	return nil
 }
 
-//
-func (s *SimpleSplitter) ReadFrom(r io.Reader) error {
-	var read int64
-	buf := make([]byte, s.bufsize)
+func (s *SimpleSplitter) ReadFrom(r io.Reader) (int64, error) {
+	//lastChunkIndex := -1
+	var buf []byte
 	for {
+		//chunkIndex := (s.count - 1) / s.hasher.ChunkSize()
+		//if lastChunkIndex != chunkIndex {
+		buf = s.hasher.getBuffer(s.count)
+		//}
 		n, err := r.Read(buf)
 		if err != nil && err != io.EOF {
-			return err
+			return s.count, err
 		}
-		s.hasher.Write(buf[:n])
-		read += int64(n)
+		//s.Write(s.readBuffer[:n])
+		s.count += n
+		s.sectionCount++
+		log.Debug("readfrom", "c", n)
 		if err == io.EOF {
+			s.Close()
 			go func() {
-				s.result <- s.hasher.Sum(read)
+				meta := make([]byte, 8)
+				binary.BigEndian.PutUint64(meta, uint64(s.count))
+				s.result <- s.hasher.Sum(nil, int(s.count), meta)
 			}()
-			return nil
+			return s.count, nil
 		}
 	}
 }
diff --git a/swarm/storage/split_test.go b/swarm/storage/split_test.go
index bc27ae0589..6ed595df67 100644
--- a/swarm/storage/split_test.go
+++ b/swarm/storage/split_test.go
@@ -17,45 +17,96 @@
 package storage
 
 import (
+	"bytes"
 	"context"
+	crand "crypto/rand"
+	"fmt"
 	"io"
+	"testing"
 )
 
 const DefaultChunkCount = 2
+
 var MaxExcessSize = DefaultChunkCount
 
-func TestAsyncWriteFromReaderCorrectness(t *testing.T) {
-  data := make([]byte, DefaultChunkSize*DefaultChunkCount+rand.Intn(MaxExcessSize))
-  reader := bytes.NewReader(b)
-  fh := &fakeHasher{}
-  splitter := NewSimpleSplitter(fh, bufsize)
+func TestFakeHasher(t *testing.T) {
+	sectionSize := 32
+	sizes := []int{0, sectionSize - 1, sectionSize, sectionSize + 1, sectionSize * 4, sectionSize*4 + 1}
+	bufSizes := []int{7, sectionSize / 2, sectionSize, sectionSize + 1, sectionSize*4 + 1}
+	for _, bsz := range bufSizes {
+		for _, sz := range sizes {
+			t.Run(fmt.Sprintf("fh-buffersize%d-bytesize%d", bsz, sz), func(t *testing.T) {
+				fh := newFakeHasher(bsz, sectionSize, 2*sectionSize)
+				s := NewSimpleSplitter(fh, bsz)
+				buf := make([]byte, bsz)
+				_, err := io.ReadFull(crand.Reader, buf)
+				if err != nil {
+					t.Fatal(err.Error())
+				}
+				r := bytes.NewReader(buf)
+				_, err = s.ReadFrom(r)
+				if err != nil {
+					t.Fatal(err.Error())
+				}
+				h, err := s.Sum(context.TODO())
+				if err != nil {
+					t.Fatal(err.Error())
+				}
+				if !bytes.Equal(h, fh.output) {
+					t.Fatalf("no match, daddyo, expected %x, got %x", fh.output, h)
+				}
+			})
+		}
+
+	}
+}
+
+type fakeHasher struct {
+	output      []byte
+	sectionSize int
+	chunkSize   int
+	count       int
+	doneC       chan struct{}
+}
 
-  n, err := io.Copy(splitter, reader)
-  if err != nil {
-    if err == io.EOF {
-      got = <-fh.result
-  }
+func newFakeHasher(byteSize int, sectionSize int, chunkSize int) *fakeHasher {
+	count := 0
+	if byteSize > 0 {
+		count = ((byteSize - 1) / sectionSize) + 1
+	}
+	return &fakeHasher{
+		sectionSize: sectionSize,
+		output:      make([]byte, byteSize),
+		count:       count,
+		chunkSize:   chunkSize,
+		doneC:       make(chan struct{}, count),
+	}
 
 }
 
-type fakeBaseHasherJoiner struct {
-  input []byte
+func (fh *fakeHasher) ChunkSize() int {
+	return fh.chunkSize
 }
 
-func (fh *fakeBaseHasherJoiner) Reset() { fh.input = nil; return}
-func (fh *fakeBaseHasherJoiner) Write(b []byte) { fh.input = append(fh.input, b...) }
-func (fh *fakeBaseHasherJoiner) Sum([]byte) []byte { return fh.input }
-func (fh *fakeBaseHasherJoiner) BlockSize() int { return 64 }
-func (fh *fakeBaseHasherJoiner) Size() int { return 32 }
+func (fh *fakeHasher) Reset() { fh.output = nil; return }
 
-type fakeHasher struct {
-  input []byte
-  output []byte
+func (fh *fakeHasher) Write(section int, data []byte) {
+	pos := section * fh.sectionSize
+	copy(fh.output[pos:], data)
+	fh.doneC <- struct{}{}
 }
 
-func newFakeHasher() *fakeHasher {
-  return &fakeHasher{}
+func (fh *fakeHasher) Size() int {
+	return 42
 }
 
-func (fh *fakeHasher) Reset() { fh.input = nil; return}
-func (fh *fakeHasher) Write([]byte)
+func (fh *fakeHasher) BlockSize() int {
+	return fh.sectionSize
+}
+
+func (fh *fakeHasher) Sum(hash []byte, length int, meta []byte) []byte {
+	for i := 0; i < fh.count; i++ {
+		<-fh.doneC
+	}
+	return fh.output
+}

From b23cbca7412e928cfe16be7ecb16b14db7fee5c3 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Wed, 25 Jul 2018 19:48:19 +0200
Subject: [PATCH 04/50] swarm/storage: Refactor with read direct into node
 buffer

---
 swarm/storage/filehasher.go | 88 +++++++++++++++++++++++--------------
 swarm/storage/split.go      | 62 ++++++++++++--------------
 swarm/storage/split_test.go | 27 +++++++++---
 3 files changed, 104 insertions(+), 73 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 2f4c2c562b..2dd3be526a 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -18,8 +18,12 @@ package storage
 
 import (
 	"encoding/binary"
+	"errors"
+	"math"
 	"sync"
 	"sync/atomic"
+
+	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
 // SectionHasher is an asynchronous writer interface to a hash
@@ -31,6 +35,7 @@ type SectionHasher interface {
 	Size() int
 	BlockSize() int
 	ChunkSize() int
+	GetBuffer(count int64) ([]byte, error)
 	Sum(b []byte, length int, meta []byte) []byte
 }
 
@@ -45,20 +50,24 @@ type FileHasher struct {
 	hasherFunc func() SectionHasher // hasher constructor
 	result     chan []byte          // channel to put hash asynchronously
 	size       int
+	lnBranches float64
 }
 
-func New(hasherFunc func() SectionHasher, branches int) *FileHasher {
-	sh := &FileHasher{
+func NewFileHasher(hasherFunc func() SectionHasher, branches int, secSize int) *FileHasher {
+	fh := &FileHasher{
 		hasherFunc: hasherFunc,
 		result:     make(chan []byte),
+		branches:   branches,
+		secsize:    secSize,
 	}
-	sh.pool = sync.Pool{
+	fh.lnBranches = math.Log(float64(branches))
+	fh.pool = sync.Pool{
 		New: func() interface{} {
-			return sh.newBatch()
+			return fh.newBatch()
 		},
 	}
-	sh.size = hasherFunc().Size()
-	return sh
+	fh.size = hasherFunc().Size()
+	return fh
 }
 
 // level captures one level of chunks in the swarm hash tree
@@ -72,19 +81,20 @@ type level struct {
 
 // batch records chunks subsumed under the same parent intermediate chunk
 type batch struct {
-	nodes  []*node // nodes of the batches
-	index  int     // offset of the node
-	parent *node   // pointer to containing
-	buffer *bytes.Buffer
-	*level // pointer to containing level
+	nodes       []*node // nodes of the batches
+	index       int     // offset of the node
+	parent      *node   // pointer to containing
+	batchBuffer []byte
+	*level      // pointer to containing level
 }
 
 // node represent a chunk and embeds an async interface to the chunk hash used
 type node struct {
-	hasher SectionHasher // async hasher
-	pos    int           // index of the node chunk within its batch
-	secCnt int32         // number of sections written
-	*batch               // pointer to containing batch
+	hasher     SectionHasher // async hasher
+	pos        int           // index of the node chunk within its batch
+	secCnt     int32         // number of sections written
+	nodeBuffer []byte
+	*batch     // pointer to containing batch
 }
 
 // getParentLevel retrieves or creates the next level up from a node/batch/level
@@ -140,37 +150,45 @@ func (sh *FileHasher) newBatch() (bt *batch) {
 	nodes := make([]*node, sh.branches)
 	chunkSize := sh.ChunkSize()
 	bt = &batch{
-		buffer: make([]byte, sh.branches*chunkSize),
+		batchBuffer: make([]byte, sh.branches*chunkSize),
 		//buffer: bytes.NewBuffer(make([]byte, 0, sh.branches*sh.ChunkSize())),
 	}
 	for i := range nodes {
 		offset := chunkSize * i
 		nodes[i] = &node{
-			pos:    i,
-			hasher: sh.hasherFunc(),
-			buffer: batch[offset : offset+chunkSize],
+			pos:        i,
+			hasher:     sh.hasherFunc(),
+			nodeBuffer: bt.batchBuffer[offset : offset+chunkSize],
 		}
 	}
-	batch.nodes = nodes
+	bt.nodes = nodes
 	return bt
 }
 
-func (sh *FileHasher) getNodeSectionBuffer(globalCount int) ([]byte, func()) {
-	batchIndex := globalCount / sh.branches * sh.ChunkSize()
-	batchPos := globalCount % sh.branches * sh.ChunkSize()
-	batchNodeIndex := batchPos / sh.ChunkSize()
-	batchNodePos := batchPosIndex % sh.ChunkSize()
-	return sh.batches[batchIndex].nodes[batchNodeIndex].getSectionBuffer(batchNodePos)
+// \TODO if translate to sections, they must also be expd not only sections
+func (fh *FileHasher) OffsetToLevel(c int) int {
+	chunkCount := c / fh.ChunkSize()
+	log.Warn("chunksize", "offset", c, "c", fh.ChunkSize(), "b", fh.branches, "s", fh.secsize, "count", chunkCount)
+	return int(math.Log(float64(chunkCount)) / fh.lnBranches)
 }
 
-func (n *node) getSectionBuffer(p int) (int, func()) {
-	currentCount := atomic.AddInt32(&n.secCnt, 1)
-	nodeSectionByteOffset := (batchNodePos / sh.BlockSize()) * sh.BlockSize()
-	var doneFunc func()
-	if currentCount == int32(n.branches) {
-		doneFunc = n.done
+func (fh *FileHasher) GetBuffer(globalCount int) ([]byte, error) {
+	batchIndex := globalCount / fh.branches * fh.ChunkSize()
+	batchPos := globalCount % fh.branches * fh.ChunkSize()
+	batchNodeIndex := batchPos / fh.ChunkSize()
+	batchNodePos := batchPos % fh.ChunkSize()
+	lvl := fh.OffsetToLevel(globalCount)
+	bt, ok := fh.levels[lvl].batches.Load(batchIndex)
+	if !ok {
+		return nil, errors.New("count out of bounds")
 	}
-	return n.buffer[nodeSectionByteOffset : nodeSectionByteOffset+sh.BlockSize()], batchNodeIndex, doneFunc
+	return bt.(*batch).nodes[batchNodeIndex].getSectionBuffer(batchNodePos), nil
+}
+
+func (n *node) getSectionBuffer(p int) []byte {
+	//currentCount := atomic.AddInt32(&n.secCnt, 1)
+	nodeSectionByteOffset := (p / n.secsize) * n.secsize
+	return n.nodeBuffer[nodeSectionByteOffset : nodeSectionByteOffset+n.secsize]
 }
 
 // dataSpan returns the size of data encoded under the current node, serialized as big endian uint64
@@ -193,7 +211,7 @@ func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
 	n.hasher.Write(sectionIndex, section)
 	if currentCount == int32(n.branches) {
-		n.node()
+		n.done()
 	}
 }
 
@@ -205,6 +223,7 @@ func (n *node) done() {
 		parentNode := parentBatch.nodes[parentNodeIndex]
 		parentNode.write(n.pos, n.hasher.Sum(nil, n.hasher.ChunkSize(), parentNode.dataSpan()))
 	}()
+
 }
 
 // length is global length
@@ -265,6 +284,7 @@ func (fh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
 		return fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
 	}
 
+	log.Debug("fh sum", "length", length)
 	// calculate the index the last batch
 	lastBatchIndexInFile := (length - 1) / fh.ChunkSize() * fh.branches
 
diff --git a/swarm/storage/split.go b/swarm/storage/split.go
index 377bb5863b..c5a8f2c6d7 100644
--- a/swarm/storage/split.go
+++ b/swarm/storage/split.go
@@ -17,12 +17,10 @@
 package storage
 
 import (
-	"bytes"
 	"context"
 	"encoding/binary"
-	"io"
-
 	"github.com/ethereum/go-ethereum/swarm/log"
+	"io"
 )
 
 // SimpleSplitter implements the io.ReaderFrom interface for synchronous read from data
@@ -34,65 +32,61 @@ type SimpleSplitter struct {
 	count        int64
 	result       chan []byte
 	readBuffer   []byte
-	writeBuffer  []byte
 }
 
 //
 func NewSimpleSplitter(h SectionHasher, bufferSize int) *SimpleSplitter {
-	writeBufferBytes := make([]byte, 0, h.BlockSize())
 	return &SimpleSplitter{
-		hasher:      h,
-		result:      make(chan []byte),
-		readBuffer:  make([]byte, bufferSize),
-		writeBuffer: bytes.NewBuffer(writeBufferBytes),
+		hasher:     h,
+		result:     make(chan []byte),
+		readBuffer: make([]byte, bufferSize),
 	}
 }
 
 func (s *SimpleSplitter) Write(buf []byte) (int, error) {
 	for len(buf) > 0 {
-		sectionOffset := s.section - s.hasher.BlockSize()
-		writeBuffer := s.hasher.getBuffer(s.count)
-		c := len(buf)
-		if c > len(s.hasher.BlockSize()) {
-			c = len(s.hasher.BlockSize())
-		}
-		s.hasher.Write(s.sectionCount, s.writeBuffer.Bytes())
-		s.count += c
-		s.sectionCount++
-		log.Debug("writer", "c", c)
-		buf = buf[c:]
-		s.sectionCount++
+		//		sectionOffset := s.sectionCount - s.hasher.BlockSize()
+		//		writeBuffer := s.hasher.getBuffer(s.count)
+		//		c := len(buf)
+		//		if c > len(s.hasher.BlockSize()) {
+		//			c = len(s.hasher.BlockSize())
+		//		}
+		//		s.hasher.Write(s.sectionCount, s.writeBuffer.Bytes())
+		//		s.count += c
+		//		s.sectionCount++
+		//		log.Debug("writer", "c", c)
+		//		buf = buf[c:]
+		//		s.sectionCount++
 	}
 	return int(s.count), nil
 }
 
 func (s *SimpleSplitter) Close() error {
-	if s.writeBuffer.Len() > 0 {
-		log.Debug("writer flush on close", "c", s.writeBuffer.Len())
-		s.hasher.Write(s.sectionCount, s.writeBuffer.Bytes())
-	}
-	s.count = 0
+	//	if s.writeBuffer.Len() > 0 {
+	//		log.Debug("writer flush on close", "c", s.writeBuffer.Len())
+	//		s.hasher.Write(s.sectionCount, s.writeBuffer.Bytes())
+	//	}
+	//	s.count = 0
 	return nil
 }
 
 func (s *SimpleSplitter) ReadFrom(r io.Reader) (int64, error) {
 	//lastChunkIndex := -1
-	var buf []byte
 	for {
-		//chunkIndex := (s.count - 1) / s.hasher.ChunkSize()
-		//if lastChunkIndex != chunkIndex {
-		buf = s.hasher.getBuffer(s.count)
-		//}
+		buf, err := s.hasher.GetBuffer(s.count)
+		if err != nil {
+			return s.count, err
+		}
 		n, err := r.Read(buf)
 		if err != nil && err != io.EOF {
 			return s.count, err
 		}
-		//s.Write(s.readBuffer[:n])
-		s.count += n
+		s.count += int64(n)
 		s.sectionCount++
 		log.Debug("readfrom", "c", n)
 		if err == io.EOF {
-			s.Close()
+			log.Debug("have eof")
+			//s.Close()
 			go func() {
 				meta := make([]byte, 8)
 				binary.BigEndian.PutUint64(meta, uint64(s.count))
diff --git a/swarm/storage/split_test.go b/swarm/storage/split_test.go
index 6ed595df67..701a9cfbfb 100644
--- a/swarm/storage/split_test.go
+++ b/swarm/storage/split_test.go
@@ -23,6 +23,8 @@ import (
 	"fmt"
 	"io"
 	"testing"
+
+	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
 const DefaultChunkCount = 2
@@ -32,7 +34,7 @@ var MaxExcessSize = DefaultChunkCount
 func TestFakeHasher(t *testing.T) {
 	sectionSize := 32
 	sizes := []int{0, sectionSize - 1, sectionSize, sectionSize + 1, sectionSize * 4, sectionSize*4 + 1}
-	bufSizes := []int{7, sectionSize / 2, sectionSize, sectionSize + 1, sectionSize*4 + 1}
+	bufSizes := []int{32, 7, sectionSize / 2, sectionSize, sectionSize + 1, sectionSize*4 + 1}
 	for _, bsz := range bufSizes {
 		for _, sz := range sizes {
 			t.Run(fmt.Sprintf("fh-buffersize%d-bytesize%d", bsz, sz), func(t *testing.T) {
@@ -66,21 +68,33 @@ type fakeHasher struct {
 	sectionSize int
 	chunkSize   int
 	count       int
+	cap         int
 	doneC       chan struct{}
 }
 
 func newFakeHasher(byteSize int, sectionSize int, chunkSize int) *fakeHasher {
-	count := 0
+	var count int
 	if byteSize > 0 {
 		count = ((byteSize - 1) / sectionSize) + 1
 	}
-	return &fakeHasher{
+	fh := &fakeHasher{
 		sectionSize: sectionSize,
 		output:      make([]byte, byteSize),
-		count:       count,
+		cap:         count,
 		chunkSize:   chunkSize,
 		doneC:       make(chan struct{}, count),
 	}
+	log.Debug("fakehasher create", "cap", count)
+	return fh
+}
+
+func (fh *fakeHasher) GetBuffer(p int64) ([]byte, error) {
+	if fh.count < fh.cap {
+		log.Debug("fakehasher cc", "cap", fh.cap, "count", fh.count)
+		fh.doneC <- struct{}{}
+	}
+	fh.count++
+	return make([]byte, fh.sectionSize), nil
 
 }
 
@@ -91,6 +105,7 @@ func (fh *fakeHasher) ChunkSize() int {
 func (fh *fakeHasher) Reset() { fh.output = nil; return }
 
 func (fh *fakeHasher) Write(section int, data []byte) {
+	log.Warn("wrigint to hasher", "src", section, "data", data)
 	pos := section * fh.sectionSize
 	copy(fh.output[pos:], data)
 	fh.doneC <- struct{}{}
@@ -105,7 +120,9 @@ func (fh *fakeHasher) BlockSize() int {
 }
 
 func (fh *fakeHasher) Sum(hash []byte, length int, meta []byte) []byte {
-	for i := 0; i < fh.count; i++ {
+	for i := 0; i < fh.cap; i++ {
+
+		log.Debug("sum", "count", fh.count, "length", length, "i", i)
 		<-fh.doneC
 	}
 	return fh.output

From b79bc23e98227a634b51d45105f0f5117ff4d22a Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 2 Aug 2018 11:34:47 +0200
Subject: [PATCH 05/50] swarm/storage: Add GetBuffer test, level 0 add on fh
 init

---
 swarm/storage/filehasher.go | 241 ++++++++++++++++++++++++------------
 swarm/storage/split.go      |   9 +-
 swarm/storage/split_test.go |  14 ++-
 3 files changed, 174 insertions(+), 90 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 2dd3be526a..2d66849ef1 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -18,7 +18,8 @@ package storage
 
 import (
 	"encoding/binary"
-	"errors"
+	"fmt"
+	"io"
 	"math"
 	"sync"
 	"sync/atomic"
@@ -31,29 +32,32 @@ import (
 // Sum can be called once the final length is known potentially before all sections are complete
 type SectionHasher interface {
 	Reset()
-	Write(idx int, section []byte)
+	WriteSection(idx int64, section []byte) int
 	Size() int
 	BlockSize() int
 	ChunkSize() int
-	GetBuffer(count int64) ([]byte, error)
+	WriteBuffer(count int64, r io.Reader) (int, error)
+	SetLength(length int64)
 	Sum(b []byte, length int, meta []byte) []byte
 }
 
 // FileHasher is instantiated each time a file is swarm hashed
 // itself implements the ChunkHasher interface
 type FileHasher struct {
-	mtx        sync.Mutex           // RW lock to add/read levels push and unshift batches
-	pool       sync.Pool            // batch resource pool
-	levels     []*level             // levels of the swarm hash tree
-	secsize    int                  // section size
-	branches   int                  // branching factor
-	hasherFunc func() SectionHasher // hasher constructor
-	result     chan []byte          // channel to put hash asynchronously
-	size       int
+	mtx        sync.Mutex       // RW lock to add/read levels push and unshift batches
+	pool       sync.Pool        // batch resource pool
+	levels     []*level         // levels of the swarm hash tree
+	secsize    int              // section size
+	branches   int              // branching factor
+	hasherFunc func() SwarmHash // SectionHasher // hasher constructor
+	result     chan []byte      // channel to put hash asynchronously
+	digestSize int
+	dataLength int64
 	lnBranches float64
 }
 
-func NewFileHasher(hasherFunc func() SectionHasher, branches int, secSize int) *FileHasher {
+//func NewFileHasher(hasherFunc func() SectionHasher, branches int, secSize int) *FileHasher {
+func NewFileHasher(hasherFunc func() SwarmHash, branches int, secSize int) *FileHasher {
 	fh := &FileHasher{
 		hasherFunc: hasherFunc,
 		result:     make(chan []byte),
@@ -66,7 +70,12 @@ func NewFileHasher(hasherFunc func() SectionHasher, branches int, secSize int) *
 			return fh.newBatch()
 		},
 	}
-	fh.size = hasherFunc().Size()
+	fh.digestSize = hasherFunc().Size()
+
+	fh.levels = append(fh.levels, &level{
+		FileHasher: fh,
+		levelIndex: 0,
+	})
 	return fh
 }
 
@@ -81,20 +90,23 @@ type level struct {
 
 // batch records chunks subsumed under the same parent intermediate chunk
 type batch struct {
-	nodes       []*node // nodes of the batches
-	index       int     // offset of the node
-	parent      *node   // pointer to containing
-	batchBuffer []byte
-	*level      // pointer to containing level
+	nodes             []*node // nodes of the batches
+	parent            *node   // pointer to containing
+	nodeCompleteCount int
+	batchBuffer       []byte
+	index             int // offset of the node
+	*level                // pointer to containing level
 }
 
 // node represent a chunk and embeds an async interface to the chunk hash used
 type node struct {
-	hasher     SectionHasher // async hasher
-	pos        int           // index of the node chunk within its batch
-	secCnt     int32         // number of sections written
-	nodeBuffer []byte
-	*batch     // pointer to containing batch
+	hasher        SwarmHash // SectionHasher // async hasher
+	pos           int       // index of the node chunk within its batch
+	secCnt        int32     // number of sections written
+	nodeBuffer    []byte
+	nodeIndex     int
+	writeComplete chan struct{}
+	*batch        // pointer to containing batch
 }
 
 // getParentLevel retrieves or creates the next level up from a node/batch/level
@@ -105,6 +117,7 @@ func (lev *level) getLevel(pl int) (par *level) {
 	}
 	par = &level{
 		levelIndex: pl,
+		FileHasher: lev.FileHasher,
 	}
 	lev.levels = append(lev.levels, par)
 	return par
@@ -124,6 +137,8 @@ func (lev *level) getOrCreateBatch(index int) *batch {
 	pb := lev.getBatch(index)
 	if pb == nil {
 		pb = lev.pool.Get().(*batch)
+		pb.index = index
+		pb.level = lev
 		lev.batches.Store(index, pb)
 	}
 	return pb
@@ -140,67 +155,106 @@ func (b *batch) delink() {
 	b.pool.Put(b)
 }
 
+func (fh *FileHasher) BlockSize() int {
+	return fh.secsize
+}
+
 // returns the digest size of the underlying hasher
 func (fh *FileHasher) Size() int {
-	return fh.size
+	return fh.digestSize
+}
+
+func (fh *FileHasher) WriteSection(idx int64, data []byte) int {
+	return 0
 }
 
 // newBatch constructs a reuseable batch
-func (sh *FileHasher) newBatch() (bt *batch) {
-	nodes := make([]*node, sh.branches)
-	chunkSize := sh.ChunkSize()
+func (fh *FileHasher) newBatch() (bt *batch) {
+	nodes := make([]*node, fh.branches)
+	chunkSize := fh.ChunkSize()
 	bt = &batch{
-		batchBuffer: make([]byte, sh.branches*chunkSize),
-		//buffer: bytes.NewBuffer(make([]byte, 0, sh.branches*sh.ChunkSize())),
+		batchBuffer: make([]byte, fh.branches*chunkSize),
 	}
 	for i := range nodes {
 		offset := chunkSize * i
 		nodes[i] = &node{
-			pos:        i,
-			hasher:     sh.hasherFunc(),
-			nodeBuffer: bt.batchBuffer[offset : offset+chunkSize],
+			pos:           i,
+			hasher:        fh.hasherFunc(),
+			nodeBuffer:    bt.batchBuffer[offset : offset+chunkSize],
+			batch:         bt,
+			writeComplete: make(chan struct{}),
 		}
 	}
 	bt.nodes = nodes
 	return bt
 }
 
-// \TODO if translate to sections, they must also be expd not only sections
-func (fh *FileHasher) OffsetToLevel(c int) int {
-	chunkCount := c / fh.ChunkSize()
-	log.Warn("chunksize", "offset", c, "c", fh.ChunkSize(), "b", fh.branches, "s", fh.secsize, "count", chunkCount)
-	return int(math.Log(float64(chunkCount)) / fh.lnBranches)
+// level depth is index of level ascending from data level towards tree root
+func (fh *FileHasher) OffsetToLevelDepth(c int64) int {
+	chunkCount := c / int64(fh.ChunkSize())
+	level := int(math.Log(float64(chunkCount)) / fh.lnBranches)
+	log.Warn("chunksize", "offset", c, "c", fh.ChunkSize(), "b", fh.branches, "s", fh.secsize, "count", chunkCount, "level", level)
+	return level
 }
 
-func (fh *FileHasher) GetBuffer(globalCount int) ([]byte, error) {
-	batchIndex := globalCount / fh.branches * fh.ChunkSize()
-	batchPos := globalCount % fh.branches * fh.ChunkSize()
+// returns data level buffer position for offset globalCount
+func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
+
+	// writes are only valid on section thresholds
+	if globalCount%fh.BlockSize() > 0 {
+		return 0, fmt.Errorf("offset must be multiples of blocksize %d", fh.BlockSize())
+	}
+
+	// retrieve the node we are writing to
+	batchIndex := globalCount / (fh.branches * fh.ChunkSize())
+	batchPos := globalCount % (fh.branches * fh.ChunkSize())
 	batchNodeIndex := batchPos / fh.ChunkSize()
 	batchNodePos := batchPos % fh.ChunkSize()
-	lvl := fh.OffsetToLevel(globalCount)
-	bt, ok := fh.levels[lvl].batches.Load(batchIndex)
-	if !ok {
-		return nil, errors.New("count out of bounds")
+	//log.Debug("batch", "nodepos", batchNodePos, "node", batchNodeIndex, "global", globalCount, "batchindex", batchIndex, "batchpos", batchPos, "blockSize", fh.BlockSize())
+	bt := fh.levels[0].getOrCreateBatch(batchIndex)
+	nod := bt.nodes[batchNodeIndex]
+
+	// Make sure there is a pointer to the data level on the node
+	if nod.level == nil {
+		nod.level = fh.levels[0]
+	}
+	buf := nod.nodeBuffer[batchNodePos : batchNodePos+fh.BlockSize()]
+	c, err := r.Read(buf)
+	if err != nil {
+		return 0, err
+	} else if c < fh.BlockSize() {
+		return 0, io.ErrUnexpectedEOF
 	}
-	return bt.(*batch).nodes[batchNodeIndex].getSectionBuffer(batchNodePos), nil
+	currentCount := atomic.AddInt32(&nod.secCnt, 1)
+	if currentCount == int32(nod.branches) {
+		nod.done()
+		//nod.writeComplete <- struct{}{}
+	}
+	return fh.BlockSize(), nil
 }
 
-func (n *node) getSectionBuffer(p int) []byte {
-	//currentCount := atomic.AddInt32(&n.secCnt, 1)
-	nodeSectionByteOffset := (p / n.secsize) * n.secsize
-	return n.nodeBuffer[nodeSectionByteOffset : nodeSectionByteOffset+n.secsize]
+// called when the final length of the data is known
+func (fh *FileHasher) SetLength(l int64) {
+	fh.dataLength = l
+
+	// fill out missing levels in the filehasher
+	levelDepth := fh.OffsetToLevelDepth(l)
+	for i := len(fh.levels) - 1; i < levelDepth; i++ {
+		fh.levels = append(fh.levels, &level{
+			levelIndex: i,
+			FileHasher: fh,
+		})
+	}
+	log.Debug("levels", "c", len(fh.levels))
 }
 
-// dataSpan returns the size of data encoded under the current node, serialized as big endian uint64
-func (n *node) dataSpan() []byte {
-	//secsize := n.hasher.BlockSize()
-	span := uint64(n.hasher.ChunkSize())
+// dataSpan returns the size of data encoded under the current node
+func (n *node) span() uint64 {
+	span := uint64(n.ChunkSize())
 	for l := 0; l < n.levelIndex; l++ {
 		span *= uint64(n.branches)
 	}
-	meta := make([]byte, 8)
-	binary.BigEndian.PutUint64(meta, span)
-	return meta
+	return span
 }
 
 func (n *node) Write(sectionIndex int, section []byte) {
@@ -209,7 +263,12 @@ func (n *node) Write(sectionIndex int, section []byte) {
 
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
-	n.hasher.Write(sectionIndex, section)
+	//n.hasher.Write(sectionIndex, section)
+	n.hasher.Reset()
+	n.hasher.Write(section)
+	sum := n.hasher.Sum(nil)
+	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "data", sum, "level", n.levelIndex)
+	copy(n.nodeBuffer[sectionIndex:sectionIndex+n.BlockSize()], sum)
 	if currentCount == int32(n.branches) {
 		n.done()
 	}
@@ -218,27 +277,38 @@ func (n *node) write(sectionIndex int, section []byte) {
 func (n *node) done() {
 	go func() {
 		parentBatchIndex := n.index / n.branches
-		parentBatch := n.levels[n.levelIndex+1].getBatch(parentBatchIndex)
+		parentBatch := n.getLevel(n.levelIndex + 1).getOrCreateBatch(parentBatchIndex)
 		parentNodeIndex := n.index % n.branches
 		parentNode := parentBatch.nodes[parentNodeIndex]
-		parentNode.write(n.pos, n.hasher.Sum(nil, n.hasher.ChunkSize(), parentNode.dataSpan()))
+		serializedLength := make([]byte, 8)
+		binary.LittleEndian.PutUint64(serializedLength, parentNode.span())
+		//n.hasher.ResetWithLength(serializedLength)
+		//n.hasher.Write(n.nodeBuffer)
+		//sum := n.hasher.Sum(nil)
+		//log.Debug("sum", "s", sum, "index", n.index, "nodepos", n.pos, "buf", n.nodeBuffer, "parentNode", parentNode.pos, "levelindex", n.levelIndex)
+		parentNode.write(n.pos*n.BlockSize(), n.nodeBuffer)
 	}()
 
 }
 
 // length is global length
-func (n *node) sum(length int, nodeSpan int) {
+func (n *node) sum(length int64, nodeSpan int64) {
+
+	select {
+	case <-n.writeComplete:
+	}
 
+	log.Debug("node sum", "l", length, "span", nodeSpan)
 	// nodeSpan is the total byte size of a complete tree under the current node
-	nodeSpan *= n.branches
+	nodeSpan *= int64(n.branches)
 
 	// if a new batch would be started
-	batchSpan := nodeSpan * n.branches
-	nodeIndex := length % batchSpan
+	batchSpan := nodeSpan * int64(n.branches)
+	nodeIndex := length % int64(batchSpan)
 	var parentNode *node
 	if nodeIndex == 0 && len(n.levels) > n.levelIndex+1 {
-		batchIndex := (length-1)/batchSpan + 1
-		parentNode = n.levels[n.levelIndex+1].getBatch(batchIndex).nodes[nodeIndex]
+		batchIndex := (length-1)/int64(batchSpan) + 1
+		parentNode = n.levels[n.levelIndex+1].getBatch(int(batchIndex)).nodes[nodeIndex]
 		parentNode.sum(length, nodeSpan)
 		return
 	}
@@ -250,19 +320,23 @@ func (n *node) sum(length int, nodeSpan int) {
 	meta := make([]byte, 8)
 	binary.BigEndian.PutUint64(meta, dataLength)
 
+	log.Debug("underlen", "l", dataLength)
 	// bmtLength is the actual length of bytes in the chunk
 	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
-	var bmtLength uint64
-	if n.levelIndex == 0 {
-		bmtLength = dataLength
-	} else {
-		bmtLength = ((dataLength - 1) / uint64((nodeSpan/n.branches+1)*n.hasher.BlockSize()))
-	}
+	//var bmtLength uint64
+	//	if n.levelIndex == 0 {
+	//		bmtLength = dataLength
+	//	} else {
+	//		bmtLength = ((dataLength - 1) / uint64((nodeSpan/n.branches+1)*n.hasher.BlockSize()))
+	//	}
 
-	hash := n.hasher.Sum(nil, int(bmtLength), meta)
+	n.hasher.ResetWithLength(meta)
+	n.hasher.Write(n.nodeBuffer)
+	hash := n.hasher.Sum(nil) //, int(bmtLength), meta)
 
 	// are we on the root level?
 	if parentNode != nil {
+		log.Warn("continue")
 		parentNode.sum(length, nodeSpan)
 		return
 	}
@@ -277,26 +351,31 @@ func (fh *FileHasher) ChunkSize() int {
 // Louis note to self: secsize is the same as the size of the reference
 // Invoked after we know the actual length of the file
 // Will create the last node on the data level of the hash tree matching the length
-func (fh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
+//func (fh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
+func (fh *FileHasher) Sum(b []byte) []byte {
 
 	// handle edge case where the file is empty
-	if length == 0 {
-		return fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
+	if fh.dataLength == 0 {
+		h := fh.hasherFunc()
+		zero := [8]byte{}
+		h.ResetWithLength(zero[:])
+		return h.Sum(b) //fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
 	}
 
-	log.Debug("fh sum", "length", length)
+	log.Debug("fh sum", "length", fh.dataLength)
 	// calculate the index the last batch
-	lastBatchIndexInFile := (length - 1) / fh.ChunkSize() * fh.branches
+	lastBatchIndexInFile := (fh.dataLength - 1) / int64(fh.ChunkSize()*fh.branches)
 
 	// calculate the node index within the last batch
-	byteIndexInLastBatch := length - lastBatchIndexInFile*fh.ChunkSize()*fh.branches
-	nodeIndexInLastBatch := (byteIndexInLastBatch - 1) / fh.ChunkSize()
+	byteIndexInLastBatch := fh.dataLength - lastBatchIndexInFile*int64(fh.ChunkSize()*fh.branches)
+	nodeIndexInLastBatch := (int(byteIndexInLastBatch) - 1) / fh.ChunkSize()
 
 	// get the last node
-	lastNode := fh.levels[0].getBatch(lastBatchIndexInFile).nodes[nodeIndexInLastBatch]
+	lastNode := fh.levels[0].getBatch(int(lastBatchIndexInFile)).nodes[nodeIndexInLastBatch]
+	log.Debug("lastnode", "batchindex", lastBatchIndexInFile, "nodeindex", nodeIndexInLastBatch)
 
 	// asynchronously call sum on this node and wait for the final result
-	go lastNode.sum(length, fh.ChunkSize())
+	go lastNode.sum(fh.dataLength, int64(fh.ChunkSize()))
 	return <-fh.result
 }
 
diff --git a/swarm/storage/split.go b/swarm/storage/split.go
index c5a8f2c6d7..4a9006c24f 100644
--- a/swarm/storage/split.go
+++ b/swarm/storage/split.go
@@ -73,17 +73,12 @@ func (s *SimpleSplitter) Close() error {
 func (s *SimpleSplitter) ReadFrom(r io.Reader) (int64, error) {
 	//lastChunkIndex := -1
 	for {
-		buf, err := s.hasher.GetBuffer(s.count)
+		c, err := s.hasher.WriteBuffer(s.count, r)
 		if err != nil {
 			return s.count, err
 		}
-		n, err := r.Read(buf)
-		if err != nil && err != io.EOF {
-			return s.count, err
-		}
-		s.count += int64(n)
+		s.count += int64(c)
 		s.sectionCount++
-		log.Debug("readfrom", "c", n)
 		if err == io.EOF {
 			log.Debug("have eof")
 			//s.Close()
diff --git a/swarm/storage/split_test.go b/swarm/storage/split_test.go
index 701a9cfbfb..76c8347688 100644
--- a/swarm/storage/split_test.go
+++ b/swarm/storage/split_test.go
@@ -69,6 +69,7 @@ type fakeHasher struct {
 	chunkSize   int
 	count       int
 	cap         int
+	length      int64
 	doneC       chan struct{}
 }
 
@@ -102,13 +103,22 @@ func (fh *fakeHasher) ChunkSize() int {
 	return fh.chunkSize
 }
 
+func (fh *fakeHasher) SetLength(c int64) {
+	fh.length = c
+}
+
 func (fh *fakeHasher) Reset() { fh.output = nil; return }
 
-func (fh *fakeHasher) Write(section int, data []byte) {
+func (fh *fakeHasher) WriteBuffer(offset int64, r io.Reader) (int, error) {
+	return 0, nil
+}
+
+func (fh *fakeHasher) WriteSection(section int64, data []byte) int {
 	log.Warn("wrigint to hasher", "src", section, "data", data)
-	pos := section * fh.sectionSize
+	pos := section * int64(fh.sectionSize)
 	copy(fh.output[pos:], data)
 	fh.doneC <- struct{}{}
+	return len(data)
 }
 
 func (fh *fakeHasher) Size() int {

From e73f7ccd2f4151174b6c4e4ce9160f9d5e34dca7 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Wed, 8 Aug 2018 11:14:55 +0200
Subject: [PATCH 06/50] swarm/storage: Use and wrap bmt.SectionWriter for use
 in filehasher

---
 swarm/bmt/bmt.go                 |   2 +
 swarm/storage/filehasher.go      |  88 ++++++++++++-------------
 swarm/storage/filehasher_test.go | 106 +++++++++++++++++++++++++++++++
 swarm/storage/split.go           |   3 +-
 swarm/storage/split_test.go      |   9 ++-
 5 files changed, 160 insertions(+), 48 deletions(-)
 create mode 100644 swarm/storage/filehasher_test.go

diff --git a/swarm/bmt/bmt.go b/swarm/bmt/bmt.go
index a85d4369e5..a854b73925 100644
--- a/swarm/bmt/bmt.go
+++ b/swarm/bmt/bmt.go
@@ -417,6 +417,8 @@ type SectionWriter interface {
 	Write(index int, data []byte)                 // write into section of index
 	Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer
 	SectionSize() int                             // size of the async section unit to use
+	Size() int
+	BlockSize() int
 }
 
 // AsyncHasher extends BMT Hasher with an asynchronous segment/section writer interface
diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 2d66849ef1..140e94e286 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -24,40 +24,46 @@ import (
 	"sync"
 	"sync/atomic"
 
+	"github.com/ethereum/go-ethereum/swarm/bmt"
 	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
-// SectionHasher is an asynchronous writer interface to a hash
+// SectionWriter is an asynchronous writer interface to a hash
 // it allows for concurrent and out-of-order writes of sections of the hash's input buffer
 // Sum can be called once the final length is known potentially before all sections are complete
+//type SectionWriter interface {
+//	Reset()
+//	WriteSection(idx int64, section []byte) int
+//	Size() int
+//	BlockSize() int
+//	ChunkSize() int
+//	WriteBuffer(count int64, r io.Reader) (int, error)
+//	SetLength(length int64)
+//	Sum(b []byte, length int, meta []byte) []byte
+//}
+
 type SectionHasher interface {
-	Reset()
-	WriteSection(idx int64, section []byte) int
-	Size() int
-	BlockSize() int
-	ChunkSize() int
-	WriteBuffer(count int64, r io.Reader) (int, error)
-	SetLength(length int64)
-	Sum(b []byte, length int, meta []byte) []byte
+	bmt.SectionWriter
+	WriteBuffer(globalCount int64, r io.Reader) (int, error)
 }
 
 // FileHasher is instantiated each time a file is swarm hashed
 // itself implements the ChunkHasher interface
 type FileHasher struct {
-	mtx        sync.Mutex       // RW lock to add/read levels push and unshift batches
-	pool       sync.Pool        // batch resource pool
-	levels     []*level         // levels of the swarm hash tree
-	secsize    int              // section size
-	branches   int              // branching factor
-	hasherFunc func() SwarmHash // SectionHasher // hasher constructor
-	result     chan []byte      // channel to put hash asynchronously
+	mtx        sync.Mutex               // RW lock to add/read levels push and unshift batches
+	pool       sync.Pool                // batch resource pool
+	levels     []*level                 // levels of the swarm hash tree
+	secsize    int                      // section size
+	branches   int                      // branching factor
+	hasherFunc func() bmt.SectionWriter // SectionWriter // hasher constructor
+	result     chan []byte              // channel to put hash asynchronously
 	digestSize int
 	dataLength int64
 	lnBranches float64
 }
 
-//func NewFileHasher(hasherFunc func() SectionHasher, branches int, secSize int) *FileHasher {
-func NewFileHasher(hasherFunc func() SwarmHash, branches int, secSize int) *FileHasher {
+//func NewFileHasher(hasherFunc func() SectionWriter, branches int, secSize int) *FileHasher {
+func NewFileHasher(hasherFunc func() bmt.SectionWriter, branches int, secSize int) *FileHasher {
 	fh := &FileHasher{
 		hasherFunc: hasherFunc,
 		result:     make(chan []byte),
@@ -100,9 +106,9 @@ type batch struct {
 
 // node represent a chunk and embeds an async interface to the chunk hash used
 type node struct {
-	hasher        SwarmHash // SectionHasher // async hasher
-	pos           int       // index of the node chunk within its batch
-	secCnt        int32     // number of sections written
+	hasher        bmt.SectionWriter // async hasher
+	pos           int               // index of the node chunk within its batch
+	secCnt        int32             // number of sections written
 	nodeBuffer    []byte
 	nodeIndex     int
 	writeComplete chan struct{}
@@ -263,12 +269,9 @@ func (n *node) Write(sectionIndex int, section []byte) {
 
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
-	//n.hasher.Write(sectionIndex, section)
-	n.hasher.Reset()
-	n.hasher.Write(section)
-	sum := n.hasher.Sum(nil)
-	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "data", sum, "level", n.levelIndex)
-	copy(n.nodeBuffer[sectionIndex:sectionIndex+n.BlockSize()], sum)
+	n.hasher.Write(sectionIndex, section)
+	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex)
+	copy(n.nodeBuffer[sectionIndex:sectionIndex+n.BlockSize()], section)
 	if currentCount == int32(n.branches) {
 		n.done()
 	}
@@ -282,10 +285,6 @@ func (n *node) done() {
 		parentNode := parentBatch.nodes[parentNodeIndex]
 		serializedLength := make([]byte, 8)
 		binary.LittleEndian.PutUint64(serializedLength, parentNode.span())
-		//n.hasher.ResetWithLength(serializedLength)
-		//n.hasher.Write(n.nodeBuffer)
-		//sum := n.hasher.Sum(nil)
-		//log.Debug("sum", "s", sum, "index", n.index, "nodepos", n.pos, "buf", n.nodeBuffer, "parentNode", parentNode.pos, "levelindex", n.levelIndex)
 		parentNode.write(n.pos*n.BlockSize(), n.nodeBuffer)
 	}()
 
@@ -323,16 +322,16 @@ func (n *node) sum(length int64, nodeSpan int64) {
 	log.Debug("underlen", "l", dataLength)
 	// bmtLength is the actual length of bytes in the chunk
 	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
-	//var bmtLength uint64
-	//	if n.levelIndex == 0 {
-	//		bmtLength = dataLength
-	//	} else {
-	//		bmtLength = ((dataLength - 1) / uint64((nodeSpan/n.branches+1)*n.hasher.BlockSize()))
-	//	}
+	var bmtLength uint64
+	if n.levelIndex == 0 {
+		bmtLength = dataLength
+	} else {
+		bmtLength = ((dataLength - 1) / uint64((nodeSpan/int64(n.branches)+1)*int64(n.hasher.BlockSize())))
+	}
 
-	n.hasher.ResetWithLength(meta)
-	n.hasher.Write(n.nodeBuffer)
-	hash := n.hasher.Sum(nil) //, int(bmtLength), meta)
+	//n.hasher.ResetWithLength(meta)
+	//n.hasher.Write(n.nodeBuffer)
+	hash := n.hasher.Sum(nil, int(bmtLength), meta)
 
 	// are we on the root level?
 	if parentNode != nil {
@@ -356,10 +355,11 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 
 	// handle edge case where the file is empty
 	if fh.dataLength == 0 {
-		h := fh.hasherFunc()
-		zero := [8]byte{}
-		h.ResetWithLength(zero[:])
-		return h.Sum(b) //fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
+		//		h := fh.hasherFunc()
+		//		zero := [8]byte{}
+		//		h.ResetWithLength(zero[:])
+		//		return h.Sum(b)
+		return fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
 	}
 
 	log.Debug("fh sum", "length", fh.dataLength)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
new file mode 100644
index 0000000000..cf598c34a1
--- /dev/null
+++ b/swarm/storage/filehasher_test.go
@@ -0,0 +1,106 @@
+package storage
+
+import (
+	"bytes"
+	crand "crypto/rand"
+	"io"
+	"math/rand"
+	"testing"
+	"time"
+
+	"github.com/ethereum/go-ethereum/crypto/sha3"
+	"github.com/ethereum/go-ethereum/swarm/bmt"
+	"github.com/ethereum/go-ethereum/swarm/log"
+)
+
+func newAsyncHasher() bmt.SectionWriter {
+	tp := bmt.NewTreePool(sha3.NewKeccak256, 128, 64)
+	return &bmt.AsyncHasher{
+		Hasher: bmt.New(tp),
+	}
+}
+
+func TestLevelFromOffset(t *testing.T) {
+	fh := NewFileHasher(newAsyncHasher, 128, 32)
+	sizes := []int{64, 127, 128, 129, 128*128 - 1, 128 * 128, 128 * 128 * 128 * 20}
+	expects := []int{0, 0, 1, 1, 1, 2, 3}
+	for i, sz := range sizes {
+		offset := fh.ChunkSize() * sz
+		lvl := fh.OffsetToLevelDepth(int64(offset))
+		if lvl != expects[i] {
+			t.Fatalf("offset %d (chunkcount %d), expected level %d, got %d", offset, sz, expects[i], lvl)
+		}
+	}
+}
+
+func TestWriteBuffer(t *testing.T) {
+	data := []byte("0123456789abcdef")
+	fh := NewFileHasher(newAsyncHasher, 2, 2)
+	offsets := []int{12, 8, 4, 2, 6, 10, 0, 14}
+	r := bytes.NewReader(data)
+	for _, o := range offsets {
+		log.Debug("writing", "o", o)
+		r.Seek(int64(o), io.SeekStart)
+		_, err := fh.WriteBuffer(o, r)
+		if err != nil {
+			t.Fatal(err)
+		}
+		//copy(buf, data[o:o+2])
+	}
+
+	batchone := fh.levels[0].getBatch(0)
+	if !bytes.Equal(batchone.batchBuffer, data[:8]) {
+		t.Fatalf("expected batch one data %x, got %x", data[:8], batchone.batchBuffer)
+	}
+
+	batchtwo := fh.levels[0].getBatch(1)
+	if !bytes.Equal(batchtwo.batchBuffer, data[8:]) {
+		t.Fatalf("expected batch two data %x, got %x", data[8:], batchtwo.batchBuffer)
+	}
+
+	time.Sleep(time.Second)
+}
+
+func TestSum(t *testing.T) {
+
+	fh := NewFileHasher(newAsyncHasher, 128, 32)
+	data := make([]byte, 258*fh.ChunkSize())
+	c, err := crand.Read(data)
+	if err != nil {
+		t.Fatal(err)
+	} else if c != len(data) {
+		t.Fatalf("short read %d", c)
+	}
+
+	var offsets []int
+	for i := 0; i < len(data)/32; i++ {
+		offsets = append(offsets, i*32)
+	}
+
+	r := bytes.NewReader(data)
+	for {
+		if len(offsets) == 0 {
+			break
+		}
+		lastIndex := len(offsets) - 1
+		var c int
+		if len(offsets) > 1 {
+			c = rand.Intn(lastIndex)
+		}
+		offset := offsets[c]
+		if c != lastIndex {
+			offsets[c] = offsets[lastIndex]
+		}
+		offsets = offsets[:lastIndex]
+
+		r.Seek(int64(offset), io.SeekStart)
+		_, err := fh.WriteBuffer(offset, r)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	fh.SetLength(int64(len(data)))
+	h := fh.Sum(nil)
+	t.Logf("hash: %x", h)
+}
diff --git a/swarm/storage/split.go b/swarm/storage/split.go
index 4a9006c24f..f3666cd057 100644
--- a/swarm/storage/split.go
+++ b/swarm/storage/split.go
@@ -19,8 +19,9 @@ package storage
 import (
 	"context"
 	"encoding/binary"
-	"github.com/ethereum/go-ethereum/swarm/log"
 	"io"
+
+	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
 // SimpleSplitter implements the io.ReaderFrom interface for synchronous read from data
diff --git a/swarm/storage/split_test.go b/swarm/storage/split_test.go
index 76c8347688..147316779d 100644
--- a/swarm/storage/split_test.go
+++ b/swarm/storage/split_test.go
@@ -113,12 +113,11 @@ func (fh *fakeHasher) WriteBuffer(offset int64, r io.Reader) (int, error) {
 	return 0, nil
 }
 
-func (fh *fakeHasher) WriteSection(section int64, data []byte) int {
+func (fh *fakeHasher) Write(section int, data []byte) {
 	log.Warn("wrigint to hasher", "src", section, "data", data)
-	pos := section * int64(fh.sectionSize)
+	pos := section * fh.sectionSize
 	copy(fh.output[pos:], data)
 	fh.doneC <- struct{}{}
-	return len(data)
 }
 
 func (fh *fakeHasher) Size() int {
@@ -137,3 +136,7 @@ func (fh *fakeHasher) Sum(hash []byte, length int, meta []byte) []byte {
 	}
 	return fh.output
 }
+
+func (fh *fakeHasher) SectionSize() int {
+	return fh.sectionSize
+}

From 20a7ae98df4530397c7152c7d9046df7962d6cad Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Wed, 8 Aug 2018 15:55:50 +0200
Subject: [PATCH 07/50] swarm/storage: Make Filehasher TestSum pass on complete
 batch

returns 5fcbddf3030d1a261b80f5a069b731f1f5e90c52df4b18036b43434cda8f3305 regardless of data
---
 swarm/bmt/bmt.go                 | 14 ++++++++++++--
 swarm/bmt/bmt_test.go            |  9 +++++++++
 swarm/storage/filehasher.go      | 26 +++++---------------------
 swarm/storage/filehasher_test.go | 13 ++++++-------
 4 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/swarm/bmt/bmt.go b/swarm/bmt/bmt.go
index a854b73925..b76af205b5 100644
--- a/swarm/bmt/bmt.go
+++ b/swarm/bmt/bmt.go
@@ -23,6 +23,8 @@ import (
 	"strings"
 	"sync"
 	"sync/atomic"
+
+	"github.com/ethereum/go-ethereum/log"
 )
 
 /*
@@ -401,6 +403,7 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher {
 		secsize *= 2
 	}
 	write := func(i int, section []byte, final bool) {
+		log.Debug("bmt write sub", "i", i, "final", final, "s", len(section))
 		h.writeSection(i, section, double, final)
 	}
 	return &AsyncHasher{
@@ -459,6 +462,7 @@ func (sw *AsyncHasher) Write(i int, section []byte) {
 	t := sw.getTree()
 	// cursor keeps track of the rightmost section written so far
 	// if index is lower than cursor then just write non-final section as is
+	log.Debug("write bmt", "w", i)
 	if i < t.cursor {
 		// if index is not the rightmost, safe to write section
 		go sw.write(i, section, false)
@@ -564,8 +568,11 @@ func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
 	level := 1
 	for {
 		// at the root of the bmt just write the result to the result channel
+		//log.Debug("nodewrite", "s", len(s))
 		if n == nil {
-			h.getTree().result <- s
+			tr := h.getTree()
+			log.Debug("writenode tree", "t", tr)
+			tr.result <- s
 			return
 		}
 		// otherwise assign child hash to left or right segment
@@ -595,10 +602,13 @@ func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
 func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) {
 
 	for {
+		log.Debug("writefinalnode", "n", n, "s", len(s))
 		// at the root of the bmt just write the result to the result channel
 		if n == nil {
+			tr := h.getTree()
+			log.Debug("writefinalnode final tree", "t", tr, "s", len(s))
 			if s != nil {
-				h.getTree().result <- s
+				tr.result <- s
 			}
 			return
 		}
diff --git a/swarm/bmt/bmt_test.go b/swarm/bmt/bmt_test.go
index 760aa11d8b..bef437cfd6 100644
--- a/swarm/bmt/bmt_test.go
+++ b/swarm/bmt/bmt_test.go
@@ -23,14 +23,23 @@ import (
 	"fmt"
 	"io"
 	"math/rand"
+	"os"
 	"sync"
 	"sync/atomic"
 	"testing"
 	"time"
 
 	"github.com/ethereum/go-ethereum/crypto/sha3"
+	"github.com/ethereum/go-ethereum/log"
 )
 
+func init() {
+	hs := log.StreamHandler(os.Stderr, log.TerminalFormat(true))
+	hf := log.LvlFilterHandler(4, hs)
+	h := log.CallerFileHandler(hf)
+	log.Root().SetHandler(h)
+}
+
 // the actual data length generated (could be longer than max datalength of the BMT)
 const BufferSize = 4128
 
diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 140e94e286..18a965db00 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -28,20 +28,6 @@ import (
 	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
-// SectionWriter is an asynchronous writer interface to a hash
-// it allows for concurrent and out-of-order writes of sections of the hash's input buffer
-// Sum can be called once the final length is known potentially before all sections are complete
-//type SectionWriter interface {
-//	Reset()
-//	WriteSection(idx int64, section []byte) int
-//	Size() int
-//	BlockSize() int
-//	ChunkSize() int
-//	WriteBuffer(count int64, r io.Reader) (int, error)
-//	SetLength(length int64)
-//	Sum(b []byte, length int, meta []byte) []byte
-//}
-
 type SectionHasher interface {
 	bmt.SectionWriter
 	WriteBuffer(globalCount int64, r io.Reader) (int, error)
@@ -231,6 +217,8 @@ func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 	} else if c < fh.BlockSize() {
 		return 0, io.ErrUnexpectedEOF
 	}
+	//log.Debug("fh writbuf", "c", globalCount, "s", globalCount/fh.BlockSize())
+	nod.hasher.Write(globalCount/fh.BlockSize(), buf)
 	currentCount := atomic.AddInt32(&nod.secCnt, 1)
 	if currentCount == int32(nod.branches) {
 		nod.done()
@@ -269,8 +257,9 @@ func (n *node) Write(sectionIndex int, section []byte) {
 
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
-	n.hasher.Write(sectionIndex, section)
+
 	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex)
+	n.hasher.Write(sectionIndex/n.BlockSize(), section)
 	copy(n.nodeBuffer[sectionIndex:sectionIndex+n.BlockSize()], section)
 	if currentCount == int32(n.branches) {
 		n.done()
@@ -293,10 +282,6 @@ func (n *node) done() {
 // length is global length
 func (n *node) sum(length int64, nodeSpan int64) {
 
-	select {
-	case <-n.writeComplete:
-	}
-
 	log.Debug("node sum", "l", length, "span", nodeSpan)
 	// nodeSpan is the total byte size of a complete tree under the current node
 	nodeSpan *= int64(n.branches)
@@ -329,8 +314,7 @@ func (n *node) sum(length int64, nodeSpan int64) {
 		bmtLength = ((dataLength - 1) / uint64((nodeSpan/int64(n.branches)+1)*int64(n.hasher.BlockSize())))
 	}
 
-	//n.hasher.ResetWithLength(meta)
-	//n.hasher.Write(n.nodeBuffer)
+	log.Debug("summing", "l", bmtLength, "dl", dataLength)
 	hash := n.hasher.Sum(nil, int(bmtLength), meta)
 
 	// are we on the root level?
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index cf598c34a1..cb9dcef643 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -14,10 +14,9 @@ import (
 )
 
 func newAsyncHasher() bmt.SectionWriter {
-	tp := bmt.NewTreePool(sha3.NewKeccak256, 128, 64)
-	return &bmt.AsyncHasher{
-		Hasher: bmt.New(tp),
-	}
+	tp := bmt.NewTreePool(sha3.NewKeccak256, 128*128, 32)
+	h := bmt.New(tp)
+	return h.NewAsyncWriter(false)
 }
 
 func TestLevelFromOffset(t *testing.T) {
@@ -64,7 +63,8 @@ func TestWriteBuffer(t *testing.T) {
 func TestSum(t *testing.T) {
 
 	fh := NewFileHasher(newAsyncHasher, 128, 32)
-	data := make([]byte, 258*fh.ChunkSize())
+	//data := make([]byte, 258*fh.ChunkSize())
+	data := make([]byte, 128*fh.ChunkSize())
 	c, err := crand.Read(data)
 	if err != nil {
 		t.Fatal(err)
@@ -76,7 +76,6 @@ func TestSum(t *testing.T) {
 	for i := 0; i < len(data)/32; i++ {
 		offsets = append(offsets, i*32)
 	}
-
 	r := bytes.NewReader(data)
 	for {
 		if len(offsets) == 0 {
@@ -99,8 +98,8 @@ func TestSum(t *testing.T) {
 			t.Fatal(err)
 		}
 	}
-
 	fh.SetLength(int64(len(data)))
 	h := fh.Sum(nil)
 	t.Logf("hash: %x", h)
+
 }

From 02e5b86368b4dd27a0367555aa49c52946e31d04 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Wed, 22 Aug 2018 12:03:19 +0200
Subject: [PATCH 08/50] swarm/storage: Remove race condition in nodebuffer

---
 swarm/bmt/bmt.go                 |  55 +++++++---
 swarm/bmt/bmt_test.go            |   2 +-
 swarm/storage/filehasher.go      | 169 ++++++++++++++++++-------------
 swarm/storage/filehasher_test.go |  94 +++++++++++------
 4 files changed, 198 insertions(+), 122 deletions(-)

diff --git a/swarm/bmt/bmt.go b/swarm/bmt/bmt.go
index b76af205b5..d33262bb28 100644
--- a/swarm/bmt/bmt.go
+++ b/swarm/bmt/bmt.go
@@ -77,8 +77,9 @@ type BaseHasherFunc func() hash.Hash
 //   the tree and itself in a state reusable for hashing a new chunk
 // - generates and verifies segment inclusion proofs (TODO:)
 type Hasher struct {
-	pool *TreePool // BMT resource pool
-	bmt  *tree     // prebuilt BMT resource for flowcontrol and proofs
+	pool *TreePool  // BMT resource pool
+	bmt  *tree      // prebuilt BMT resource for flowcontrol and proofs
+	lock sync.Mutex // concurrent access to bmt member
 }
 
 // New creates a reusable BMT Hasher that
@@ -173,7 +174,7 @@ func (p *TreePool) release(t *tree) {
 type tree struct {
 	leaves  []*node     // leaf nodes of the tree, other nodes accessible via parent links
 	cursor  int         // index of rightmost currently open segment
-	offset  int         // offset (cursor position) within currently open segment
+	offset  int         // byte offset (cursor position) within currently open segment
 	section []byte      // the rightmost open section (double segment)
 	result  chan []byte // result channel
 	span    []byte      // The span of the data subsumed under the chunk
@@ -378,11 +379,11 @@ func (h *Hasher) ResetWithLength(span []byte) {
 // releaseTree gives back the Tree to the pool whereby it unlocks
 // it resets tree, segment and index
 func (h *Hasher) releaseTree() {
-	t := h.bmt
+	t := h.GetBmt()
 	if t == nil {
 		return
 	}
-	h.bmt = nil
+	h.SetBmt(nil)
 	go func() {
 		t.cursor = 0
 		t.offset = 0
@@ -396,6 +397,18 @@ func (h *Hasher) releaseTree() {
 	}()
 }
 
+func (h *Hasher) GetBmt() *tree {
+	h.lock.Lock()
+	defer h.lock.Unlock()
+	return h.bmt
+}
+
+func (h *Hasher) SetBmt(t *tree) {
+	h.lock.Lock()
+	defer h.lock.Unlock()
+	h.bmt = t
+}
+
 // NewAsyncWriter extends Hasher with an interface for concurrent segment/section writes
 func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher {
 	secsize := h.pool.SegmentSize
@@ -403,7 +416,7 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher {
 		secsize *= 2
 	}
 	write := func(i int, section []byte, final bool) {
-		log.Debug("bmt write sub", "i", i, "final", final, "s", len(section))
+		//log.Debug("bmt write sub", "i", i, "final", final, "s", len(section))
 		h.writeSection(i, section, double, final)
 	}
 	return &AsyncHasher{
@@ -462,29 +475,36 @@ func (sw *AsyncHasher) Write(i int, section []byte) {
 	t := sw.getTree()
 	// cursor keeps track of the rightmost section written so far
 	// if index is lower than cursor then just write non-final section as is
-	log.Debug("write bmt", "w", i)
+
+	log.Debug("writenote", "offset", t.offset, "i", i, "sectionlen", len(section), "cur", t.cursor, "data", section)
 	if i < t.cursor {
 		// if index is not the rightmost, safe to write section
 		go sw.write(i, section, false)
 		return
 	}
+
 	// if there is a previous rightmost section safe to write section
 	if t.offset > 0 {
 		if i == t.cursor {
+
 			// i==cursor implies cursor was set by Hash call so we can write section as final one
 			// since it can be shorter, first we copy it to the padded buffer
 			t.section = make([]byte, sw.secsize)
 			copy(t.section, section)
 			go sw.write(i, t.section, true)
 			return
+
 		}
+
 		// the rightmost section just changed, so we write the previous one as non-final
 		go sw.write(t.cursor, t.section, false)
 	}
 	// set i as the index of the righmost section written so far
 	// set t.offset to cursor*secsize+1
+
 	t.cursor = i
-	t.offset = i*sw.secsize + 1
+	//t.offset = i*sw.secsize + 1
+	t.offset = (i + 1) * sw.secsize
 	t.section = make([]byte, sw.secsize)
 	copy(t.section, section)
 }
@@ -499,6 +519,7 @@ func (sw *AsyncHasher) Write(i int, section []byte) {
 // meta: metadata to hash together with BMT root for the final digest
 //   e.g., span for protection against existential forgery
 func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
+	//log.Warn("bmt sum", "l", length)
 	sw.mtx.Lock()
 	t := sw.getTree()
 	if length == 0 {
@@ -508,6 +529,8 @@ func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
 		// for non-zero input the rightmost section is written to the tree asynchronously
 		// if the actual last section has been written (t.cursor == length/t.secsize)
 		maxsec := (length - 1) / sw.secsize
+
+		//log.Debug("sum->write", "c", t.cursor, "offset", t.offset, "meta", meta, "maxsec", maxsec)
 		if t.offset > 0 {
 			go sw.write(t.cursor, t.section, maxsec == t.cursor)
 		}
@@ -526,6 +549,7 @@ func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
 		return append(b, s...)
 	}
 	// hash together meta and BMT root hash using the pools
+	//log.Debug("dosum", "s", s, "b", b, "m", meta)
 	return doSum(sw.pool.hasher(), b, meta, s)
 }
 
@@ -550,6 +574,7 @@ func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) {
 		hasher = n.hasher
 		isLeft = i%2 == 0
 	}
+
 	// write hash into parent node
 	if final {
 		// for the last segment use writeFinalNode
@@ -568,10 +593,9 @@ func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
 	level := 1
 	for {
 		// at the root of the bmt just write the result to the result channel
-		//log.Debug("nodewrite", "s", len(s))
 		if n == nil {
 			tr := h.getTree()
-			log.Debug("writenode tree", "t", tr)
+			//log.Debug("writenode tree", "t", tr)
 			tr.result <- s
 			return
 		}
@@ -602,11 +626,11 @@ func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
 func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) {
 
 	for {
-		log.Debug("writefinalnode", "n", n, "s", len(s))
+		//log.Debug("writefinalnode", "s", len(s))
 		// at the root of the bmt just write the result to the result channel
 		if n == nil {
 			tr := h.getTree()
-			log.Debug("writefinalnode final tree", "t", tr, "s", len(s))
+			//log.Debug("writefinalnode final tree", "t", tr, "s", len(s))
 			if s != nil {
 				tr.result <- s
 			}
@@ -659,11 +683,12 @@ func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s
 
 // getTree obtains a BMT resource by reserving one from the pool and assigns it to the bmt field
 func (h *Hasher) getTree() *tree {
-	if h.bmt != nil {
-		return h.bmt
+	b := h.GetBmt()
+	if b != nil {
+		return b
 	}
 	t := h.pool.reserve()
-	h.bmt = t
+	h.SetBmt(t)
 	return t
 }
 
diff --git a/swarm/bmt/bmt_test.go b/swarm/bmt/bmt_test.go
index bef437cfd6..d664da59d8 100644
--- a/swarm/bmt/bmt_test.go
+++ b/swarm/bmt/bmt_test.go
@@ -35,7 +35,7 @@ import (
 
 func init() {
 	hs := log.StreamHandler(os.Stderr, log.TerminalFormat(true))
-	hf := log.LvlFilterHandler(4, hs)
+	hf := log.LvlFilterHandler(1, hs)
 	h := log.CallerFileHandler(hf)
 	log.Root().SetHandler(h)
 }
diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 18a965db00..632287df30 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -62,7 +62,7 @@ func NewFileHasher(hasherFunc func() bmt.SectionWriter, branches int, secSize in
 			return fh.newBatch()
 		},
 	}
-	fh.digestSize = hasherFunc().Size()
+	fh.digestSize = secSize //hasherFunc().Size()
 
 	fh.levels = append(fh.levels, &level{
 		FileHasher: fh,
@@ -82,23 +82,32 @@ type level struct {
 
 // batch records chunks subsumed under the same parent intermediate chunk
 type batch struct {
-	nodes             []*node // nodes of the batches
-	parent            *node   // pointer to containing
-	nodeCompleteCount int
-	batchBuffer       []byte
-	index             int // offset of the node
-	*level                // pointer to containing level
+	nodes       []*node // nodes of the batches
+	parent      *node   // pointer to containing
+	batchBuffer []byte  // data buffer for batch (divided between nodes)
+	index       int     // offset of the batch
+	*level              // pointer to containing level
 }
 
 // node represent a chunk and embeds an async interface to the chunk hash used
 type node struct {
-	hasher        bmt.SectionWriter // async hasher
-	pos           int               // index of the node chunk within its batch
-	secCnt        int32             // number of sections written
-	nodeBuffer    []byte
-	nodeIndex     int
-	writeComplete chan struct{}
-	*batch        // pointer to containing batch
+	lock       sync.Mutex
+	hasher     bmt.SectionWriter // async hasher
+	pos        int               // index of the node chunk within its batch
+	secCnt     int32             // number of sections written
+	size       int
+	nodeBuffer []byte
+	//writeComplete chan struct{}
+	*batch // pointer to containing batch
+}
+
+// for logging purposes
+func (n *node) getBuffer() []byte {
+	n.lock.Lock()
+	defer n.lock.Unlock()
+	b := make([]byte, len(n.nodeBuffer))
+	copy(b, n.nodeBuffer)
+	return b
 }
 
 // getParentLevel retrieves or creates the next level up from a node/batch/level
@@ -127,6 +136,7 @@ func (lev *level) getBatch(index int) *batch {
 // if it does not currently exist, create it
 func (lev *level) getOrCreateBatch(index int) *batch {
 	pb := lev.getBatch(index)
+	log.Warn("getbatch", "b", fmt.Sprintf("%p", pb))
 	if pb == nil {
 		pb = lev.pool.Get().(*batch)
 		pb.index = index
@@ -167,16 +177,19 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 	bt = &batch{
 		batchBuffer: make([]byte, fh.branches*chunkSize),
 	}
+	log.Debug("newbatch", "bufat", fmt.Sprintf("%p", bt.batchBuffer))
 	for i := range nodes {
 		offset := chunkSize * i
 		nodes[i] = &node{
-			pos:           i,
-			hasher:        fh.hasherFunc(),
-			nodeBuffer:    bt.batchBuffer[offset : offset+chunkSize],
-			batch:         bt,
-			writeComplete: make(chan struct{}),
+			pos:        i,
+			hasher:     fh.hasherFunc(),
+			nodeBuffer: bt.batchBuffer[offset : offset+chunkSize],
+			batch:      bt,
+			//writeComplete: make(chan struct{}),
 		}
 	}
+
+	log.Debug("newbatch node", "bufat", fmt.Sprintf("%p", nodes[0].batchBuffer), "node frst bufat", fmt.Sprintf("%p", nodes[0].nodeBuffer), "node last bufat", fmt.Sprintf("%p", nodes[len(nodes)-1].nodeBuffer))
 	bt.nodes = nodes
 	return bt
 }
@@ -189,7 +202,7 @@ func (fh *FileHasher) OffsetToLevelDepth(c int64) int {
 	return level
 }
 
-// returns data level buffer position for offset globalCount
+// writes data to offset count position
 func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 
 	// writes are only valid on section thresholds
@@ -202,27 +215,24 @@ func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 	batchPos := globalCount % (fh.branches * fh.ChunkSize())
 	batchNodeIndex := batchPos / fh.ChunkSize()
 	batchNodePos := batchPos % fh.ChunkSize()
-	//log.Debug("batch", "nodepos", batchNodePos, "node", batchNodeIndex, "global", globalCount, "batchindex", batchIndex, "batchpos", batchPos, "blockSize", fh.BlockSize())
+	log.Debug("batch", "nodepos", batchNodePos, "node", batchNodeIndex, "global", globalCount, "batchindex", batchIndex, "batchpos", batchPos, "blockSize", fh.BlockSize())
 	bt := fh.levels[0].getOrCreateBatch(batchIndex)
 	nod := bt.nodes[batchNodeIndex]
 
-	// Make sure there is a pointer to the data level on the node
-	if nod.level == nil {
-		nod.level = fh.levels[0]
-	}
+	nod.lock.Lock()
 	buf := nod.nodeBuffer[batchNodePos : batchNodePos+fh.BlockSize()]
 	c, err := r.Read(buf)
+	nod.lock.Unlock()
 	if err != nil {
 		return 0, err
 	} else if c < fh.BlockSize() {
 		return 0, io.ErrUnexpectedEOF
 	}
-	//log.Debug("fh writbuf", "c", globalCount, "s", globalCount/fh.BlockSize())
-	nod.hasher.Write(globalCount/fh.BlockSize(), buf)
+	nod.hasher.Write(batchNodePos/fh.BlockSize(), buf)
 	currentCount := atomic.AddInt32(&nod.secCnt, 1)
+	log.Debug("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "buf", buf[:])
 	if currentCount == int32(nod.branches) {
 		nod.done()
-		//nod.writeComplete <- struct{}{}
 	}
 	return fh.BlockSize(), nil
 }
@@ -230,16 +240,7 @@ func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 // called when the final length of the data is known
 func (fh *FileHasher) SetLength(l int64) {
 	fh.dataLength = l
-
-	// fill out missing levels in the filehasher
-	levelDepth := fh.OffsetToLevelDepth(l)
-	for i := len(fh.levels) - 1; i < levelDepth; i++ {
-		fh.levels = append(fh.levels, &level{
-			levelIndex: i,
-			FileHasher: fh,
-		})
-	}
-	log.Debug("levels", "c", len(fh.levels))
+	return
 }
 
 // dataSpan returns the size of data encoded under the current node
@@ -251,79 +252,103 @@ func (n *node) span() uint64 {
 	return span
 }
 
-func (n *node) Write(sectionIndex int, section []byte) {
-	n.write(sectionIndex, section)
-}
-
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
 
-	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex)
+	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "barch", fmt.Sprintf("%p", n.batch), "level", fmt.Sprintf("%p", n.getLevel(n.levelIndex)))
 	n.hasher.Write(sectionIndex/n.BlockSize(), section)
-	copy(n.nodeBuffer[sectionIndex:sectionIndex+n.BlockSize()], section)
+	bytePos := sectionIndex * n.BlockSize()
+	n.lock.Lock()
+	copy(n.nodeBuffer[bytePos:bytePos+n.BlockSize()], section)
+	n.lock.Unlock()
 	if currentCount == int32(n.branches) {
 		n.done()
 	}
 }
 
 func (n *node) done() {
-	go func() {
-		parentBatchIndex := n.index / n.branches
-		parentBatch := n.getLevel(n.levelIndex + 1).getOrCreateBatch(parentBatchIndex)
+	parentBatchIndex := n.index / n.branches
+	parentBatch := n.getLevel(n.levelIndex + 1).getOrCreateBatch(parentBatchIndex)
+	go func(parentBatch *batch) {
 		parentNodeIndex := n.index % n.branches
 		parentNode := parentBatch.nodes[parentNodeIndex]
 		serializedLength := make([]byte, 8)
-		binary.LittleEndian.PutUint64(serializedLength, parentNode.span())
-		parentNode.write(n.pos*n.BlockSize(), n.nodeBuffer)
-	}()
+		binary.BigEndian.PutUint64(serializedLength, parentNode.span())
+		h := n.hasher.Sum(nil, n.ChunkSize(), serializedLength)
+		parentNode.write(n.pos, h)
+	}(parentBatch)
 
 }
 
 // length is global length
 func (n *node) sum(length int64, nodeSpan int64) {
 
-	log.Debug("node sum", "l", length, "span", nodeSpan)
-	// nodeSpan is the total byte size of a complete tree under the current node
-	nodeSpan *= int64(n.branches)
-
-	// if a new batch would be started
-	batchSpan := nodeSpan * int64(n.branches)
-	nodeIndex := length % int64(batchSpan)
-	var parentNode *node
-	if nodeIndex == 0 && len(n.levels) > n.levelIndex+1 {
-		batchIndex := (length-1)/int64(batchSpan) + 1
-		parentNode = n.levels[n.levelIndex+1].getBatch(int(batchIndex)).nodes[nodeIndex]
-		parentNode.sum(length, nodeSpan)
+	if length == 0 {
+		n.result <- n.hasher.Sum(nil, 0, nil)
 		return
 	}
+	log.Warn("node sum 0", "l", length, "span", nodeSpan)
+	// nodeSpan is the total byte size of a complete tree under the current node
+	levelMul := int64(n.levelIndex * n.ChunkSize())
+	if levelMul > 0 {
+		nodeSpan *= levelMul
+	}
 
 	// dataLength is the actual length of data under the current node
-	dataLength := uint64(length % nodeSpan)
+	var dataLength uint64
+	dataLength = uint64(length) % uint64(nodeSpan)
+	if n.levelIndex == 0 && dataLength == 0 {
+		dataLength = uint64(n.ChunkSize())
+	}
 
 	// meta is the length of actual data in the nodespan
 	meta := make([]byte, 8)
 	binary.BigEndian.PutUint64(meta, dataLength)
 
-	log.Debug("underlen", "l", dataLength)
+	log.Debug("underlen", "l", dataLength, "nextlevel", n.levelIndex+1)
+
 	// bmtLength is the actual length of bytes in the chunk
 	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
 	var bmtLength uint64
 	if n.levelIndex == 0 {
 		bmtLength = dataLength
 	} else {
-		bmtLength = ((dataLength - 1) / uint64((nodeSpan/int64(n.branches)+1)*int64(n.hasher.BlockSize())))
+		bmtLength = (dataLength - 1) / (uint64((nodeSpan/int64(n.branches) + 1) * int64(n.secsize)))
 	}
 
-	log.Debug("summing", "l", bmtLength, "dl", dataLength)
-	hash := n.hasher.Sum(nil, int(bmtLength), meta)
+	// if a new batch would be started
+
+	var parentNode *node
+	if n.levelIndex != len(n.levels)-1 {
+		batchSpan := nodeSpan * int64(n.branches)
+		nodeIndex := ((length % int64(batchSpan)) - 1) / int64(n.ChunkSize())
+		nodeBatchIndex := ((length % int64(n.branches)) - 1) / int64(n.branches*n.ChunkSize())
+		batchIndex := (length - 1) / int64(batchSpan) // + 1
+
+		//parentLevel := n.getLevel(n.levelIndex + 1)
+		parentLevel := n.levels[n.levelIndex+1]
+		parentBatch := parentLevel.getBatch(int(batchIndex))
+		if parentBatch != nil {
+			parentNode = parentBatch.nodes[nodeBatchIndex]
+		}
+
+		log.Warn("node sum 1", "b", n.branches, "lv", len(n.levels), "nln", n.levelIndex, "nidx", nodeIndex, "parentnode", fmt.Sprintf("%p", parentNode), "parentlevel", parentLevel)
+
+		if parentBatch != nil {
+			b := parentBatch.nodes[0].getBuffer()
+			log.Warn("node sum 2", "batchindex", batchIndex, "buf", b)
+		}
+	}
 
 	// are we on the root level?
 	if parentNode != nil {
-		log.Warn("continue")
+		log.Warn("continue", "hasher", fmt.Sprintf("%p", n.hasher), "parent", fmt.Sprintf("%p", parentNode), "this", fmt.Sprintf("%p", n))
 		parentNode.sum(length, nodeSpan)
 		return
 	}
 
+	log.Debug("summing", "l", length, "dl", dataLength, "meta", meta, "bmtlength", bmtLength, "hasher", fmt.Sprintf("%p", n.hasher), "this", fmt.Sprintf("%p", n))
+	hash := n.hasher.Sum(nil, int(dataLength), meta)
 	n.result <- hash
 }
 
@@ -339,10 +364,6 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 
 	// handle edge case where the file is empty
 	if fh.dataLength == 0 {
-		//		h := fh.hasherFunc()
-		//		zero := [8]byte{}
-		//		h.ResetWithLength(zero[:])
-		//		return h.Sum(b)
 		return fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
 	}
 
@@ -359,7 +380,9 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 	log.Debug("lastnode", "batchindex", lastBatchIndexInFile, "nodeindex", nodeIndexInLastBatch)
 
 	// asynchronously call sum on this node and wait for the final result
-	go lastNode.sum(fh.dataLength, int64(fh.ChunkSize()))
+	go func() {
+		lastNode.sum(fh.dataLength, int64(fh.ChunkSize()))
+	}()
 	return <-fh.result
 }
 
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index cb9dcef643..8f01cd2fbf 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -2,9 +2,11 @@ package storage
 
 import (
 	"bytes"
-	crand "crypto/rand"
+	//crand "crypto/rand"
+	"encoding/binary"
 	"io"
-	"math/rand"
+	//"math/rand"
+	"hash"
 	"testing"
 	"time"
 
@@ -14,7 +16,7 @@ import (
 )
 
 func newAsyncHasher() bmt.SectionWriter {
-	tp := bmt.NewTreePool(sha3.NewKeccak256, 128*128, 32)
+	tp := bmt.NewTreePool(sha3.NewKeccak256, 128, 1)
 	h := bmt.New(tp)
 	return h.NewAsyncWriter(false)
 }
@@ -38,13 +40,11 @@ func TestWriteBuffer(t *testing.T) {
 	offsets := []int{12, 8, 4, 2, 6, 10, 0, 14}
 	r := bytes.NewReader(data)
 	for _, o := range offsets {
-		log.Debug("writing", "o", o)
 		r.Seek(int64(o), io.SeekStart)
 		_, err := fh.WriteBuffer(o, r)
 		if err != nil {
 			t.Fatal(err)
 		}
-		//copy(buf, data[o:o+2])
 	}
 
 	batchone := fh.levels[0].getBatch(0)
@@ -56,50 +56,78 @@ func TestWriteBuffer(t *testing.T) {
 	if !bytes.Equal(batchtwo.batchBuffer, data[8:]) {
 		t.Fatalf("expected batch two data %x, got %x", data[8:], batchtwo.batchBuffer)
 	}
-
-	time.Sleep(time.Second)
 }
 
 func TestSum(t *testing.T) {
 
 	fh := NewFileHasher(newAsyncHasher, 128, 32)
-	//data := make([]byte, 258*fh.ChunkSize())
-	data := make([]byte, 128*fh.ChunkSize())
-	c, err := crand.Read(data)
-	if err != nil {
-		t.Fatal(err)
-	} else if c != len(data) {
-		t.Fatalf("short read %d", c)
+	dataLength := 2 * fh.ChunkSize()
+	data := make([]byte, dataLength)
+	//c, err := crand.Read(data)
+	//	if err != nil {
+	//		t.Fatal(err)
+	//	} else if c != len(data) {
+	//		t.Fatalf("short read %d", c)
+	//	}
+	for i := 0; i < len(data); i++ {
+		data[i] = byte(i % 256)
 	}
-
 	var offsets []int
 	for i := 0; i < len(data)/32; i++ {
 		offsets = append(offsets, i*32)
 	}
 	r := bytes.NewReader(data)
-	for {
-		if len(offsets) == 0 {
-			break
-		}
-		lastIndex := len(offsets) - 1
-		var c int
-		if len(offsets) > 1 {
-			c = rand.Intn(lastIndex)
-		}
-		offset := offsets[c]
-		if c != lastIndex {
-			offsets[c] = offsets[lastIndex]
-		}
-		offsets = offsets[:lastIndex]
-
+	//	for {
+	//		if len(offsets) == 0 {
+	//			break
+	//		}
+	//		lastIndex := len(offsets) - 1
+	//		var c int
+	//		if len(offsets) > 1 {
+	//			c = rand.Intn(lastIndex)
+	//		}
+	//		offset := offsets[c]
+	//		if c != lastIndex {
+	//			offsets[c] = offsets[lastIndex]
+	//		}
+	//		offsets = offsets[:lastIndex]
+	//
+	//		r.Seek(int64(offset), io.SeekStart)
+	//		_, err := fh.WriteBuffer(offset, r)
+	//		if err != nil {
+	//			t.Fatal(err)
+	//		}
+	//	}
+	for i := 0; i < len(offsets); i++ {
+		//offset := offsets[i]
+		offset := i * 32
 		r.Seek(int64(offset), io.SeekStart)
-		_, err := fh.WriteBuffer(offset, r)
+		log.Warn("write", "o", offset)
+		c, err := fh.WriteBuffer(offset, r)
 		if err != nil {
 			t.Fatal(err)
+		} else if c < fh.BlockSize() {
+			t.Fatalf("short read %d", c)
 		}
 	}
-	fh.SetLength(int64(len(data)))
+
+	hasher := func() hash.Hash {
+		return sha3.NewKeccak256()
+	}
+	rb := bmt.NewRefHasher(hasher, dataLength)
+	meta := make([]byte, 8)
+	binary.BigEndian.PutUint64(meta, uint64(dataLength))
+	res := rb.Hash(data)
+	shasher := hasher()
+	shasher.Reset()
+	shasher.Write(meta)
+	shasher.Write(res)
+	x := shasher.Sum(nil)
+
+	time.Sleep(time.Second)
+	t.Logf("hash ref raw: %x", res)
+	t.Logf("hash ref dosum: %x", x)
+	fh.SetLength(int64(dataLength))
 	h := fh.Sum(nil)
 	t.Logf("hash: %x", h)
-
 }

From 81403c6044f11cfaa61ec45c6705572ceec88bf8 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 31 Aug 2018 21:17:09 +0200
Subject: [PATCH 09/50] swarm/storage: Hashing completes with both 1 and 2
 batches

---
 swarm/bmt/bmt.go                 |  59 +++-------------
 swarm/storage/filehasher.go      | 115 +++++++++++++++++--------------
 swarm/storage/filehasher_test.go |  72 +++++++++++--------
 3 files changed, 116 insertions(+), 130 deletions(-)

diff --git a/swarm/bmt/bmt.go b/swarm/bmt/bmt.go
index d33262bb28..a85d4369e5 100644
--- a/swarm/bmt/bmt.go
+++ b/swarm/bmt/bmt.go
@@ -23,8 +23,6 @@ import (
 	"strings"
 	"sync"
 	"sync/atomic"
-
-	"github.com/ethereum/go-ethereum/log"
 )
 
 /*
@@ -77,9 +75,8 @@ type BaseHasherFunc func() hash.Hash
 //   the tree and itself in a state reusable for hashing a new chunk
 // - generates and verifies segment inclusion proofs (TODO:)
 type Hasher struct {
-	pool *TreePool  // BMT resource pool
-	bmt  *tree      // prebuilt BMT resource for flowcontrol and proofs
-	lock sync.Mutex // concurrent access to bmt member
+	pool *TreePool // BMT resource pool
+	bmt  *tree     // prebuilt BMT resource for flowcontrol and proofs
 }
 
 // New creates a reusable BMT Hasher that
@@ -174,7 +171,7 @@ func (p *TreePool) release(t *tree) {
 type tree struct {
 	leaves  []*node     // leaf nodes of the tree, other nodes accessible via parent links
 	cursor  int         // index of rightmost currently open segment
-	offset  int         // byte offset (cursor position) within currently open segment
+	offset  int         // offset (cursor position) within currently open segment
 	section []byte      // the rightmost open section (double segment)
 	result  chan []byte // result channel
 	span    []byte      // The span of the data subsumed under the chunk
@@ -379,11 +376,11 @@ func (h *Hasher) ResetWithLength(span []byte) {
 // releaseTree gives back the Tree to the pool whereby it unlocks
 // it resets tree, segment and index
 func (h *Hasher) releaseTree() {
-	t := h.GetBmt()
+	t := h.bmt
 	if t == nil {
 		return
 	}
-	h.SetBmt(nil)
+	h.bmt = nil
 	go func() {
 		t.cursor = 0
 		t.offset = 0
@@ -397,18 +394,6 @@ func (h *Hasher) releaseTree() {
 	}()
 }
 
-func (h *Hasher) GetBmt() *tree {
-	h.lock.Lock()
-	defer h.lock.Unlock()
-	return h.bmt
-}
-
-func (h *Hasher) SetBmt(t *tree) {
-	h.lock.Lock()
-	defer h.lock.Unlock()
-	h.bmt = t
-}
-
 // NewAsyncWriter extends Hasher with an interface for concurrent segment/section writes
 func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher {
 	secsize := h.pool.SegmentSize
@@ -416,7 +401,6 @@ func (h *Hasher) NewAsyncWriter(double bool) *AsyncHasher {
 		secsize *= 2
 	}
 	write := func(i int, section []byte, final bool) {
-		//log.Debug("bmt write sub", "i", i, "final", final, "s", len(section))
 		h.writeSection(i, section, double, final)
 	}
 	return &AsyncHasher{
@@ -433,8 +417,6 @@ type SectionWriter interface {
 	Write(index int, data []byte)                 // write into section of index
 	Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer
 	SectionSize() int                             // size of the async section unit to use
-	Size() int
-	BlockSize() int
 }
 
 // AsyncHasher extends BMT Hasher with an asynchronous segment/section writer interface
@@ -475,36 +457,28 @@ func (sw *AsyncHasher) Write(i int, section []byte) {
 	t := sw.getTree()
 	// cursor keeps track of the rightmost section written so far
 	// if index is lower than cursor then just write non-final section as is
-
-	log.Debug("writenote", "offset", t.offset, "i", i, "sectionlen", len(section), "cur", t.cursor, "data", section)
 	if i < t.cursor {
 		// if index is not the rightmost, safe to write section
 		go sw.write(i, section, false)
 		return
 	}
-
 	// if there is a previous rightmost section safe to write section
 	if t.offset > 0 {
 		if i == t.cursor {
-
 			// i==cursor implies cursor was set by Hash call so we can write section as final one
 			// since it can be shorter, first we copy it to the padded buffer
 			t.section = make([]byte, sw.secsize)
 			copy(t.section, section)
 			go sw.write(i, t.section, true)
 			return
-
 		}
-
 		// the rightmost section just changed, so we write the previous one as non-final
 		go sw.write(t.cursor, t.section, false)
 	}
 	// set i as the index of the righmost section written so far
 	// set t.offset to cursor*secsize+1
-
 	t.cursor = i
-	//t.offset = i*sw.secsize + 1
-	t.offset = (i + 1) * sw.secsize
+	t.offset = i*sw.secsize + 1
 	t.section = make([]byte, sw.secsize)
 	copy(t.section, section)
 }
@@ -519,7 +493,6 @@ func (sw *AsyncHasher) Write(i int, section []byte) {
 // meta: metadata to hash together with BMT root for the final digest
 //   e.g., span for protection against existential forgery
 func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
-	//log.Warn("bmt sum", "l", length)
 	sw.mtx.Lock()
 	t := sw.getTree()
 	if length == 0 {
@@ -529,8 +502,6 @@ func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
 		// for non-zero input the rightmost section is written to the tree asynchronously
 		// if the actual last section has been written (t.cursor == length/t.secsize)
 		maxsec := (length - 1) / sw.secsize
-
-		//log.Debug("sum->write", "c", t.cursor, "offset", t.offset, "meta", meta, "maxsec", maxsec)
 		if t.offset > 0 {
 			go sw.write(t.cursor, t.section, maxsec == t.cursor)
 		}
@@ -549,7 +520,6 @@ func (sw *AsyncHasher) Sum(b []byte, length int, meta []byte) (s []byte) {
 		return append(b, s...)
 	}
 	// hash together meta and BMT root hash using the pools
-	//log.Debug("dosum", "s", s, "b", b, "m", meta)
 	return doSum(sw.pool.hasher(), b, meta, s)
 }
 
@@ -574,7 +544,6 @@ func (h *Hasher) writeSection(i int, section []byte, double bool, final bool) {
 		hasher = n.hasher
 		isLeft = i%2 == 0
 	}
-
 	// write hash into parent node
 	if final {
 		// for the last segment use writeFinalNode
@@ -594,9 +563,7 @@ func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
 	for {
 		// at the root of the bmt just write the result to the result channel
 		if n == nil {
-			tr := h.getTree()
-			//log.Debug("writenode tree", "t", tr)
-			tr.result <- s
+			h.getTree().result <- s
 			return
 		}
 		// otherwise assign child hash to left or right segment
@@ -626,13 +593,10 @@ func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
 func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) {
 
 	for {
-		//log.Debug("writefinalnode", "s", len(s))
 		// at the root of the bmt just write the result to the result channel
 		if n == nil {
-			tr := h.getTree()
-			//log.Debug("writefinalnode final tree", "t", tr, "s", len(s))
 			if s != nil {
-				tr.result <- s
+				h.getTree().result <- s
 			}
 			return
 		}
@@ -683,12 +647,11 @@ func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s
 
 // getTree obtains a BMT resource by reserving one from the pool and assigns it to the bmt field
 func (h *Hasher) getTree() *tree {
-	b := h.GetBmt()
-	if b != nil {
-		return b
+	if h.bmt != nil {
+		return h.bmt
 	}
 	t := h.pool.reserve()
-	h.SetBmt(t)
+	h.bmt = t
 	return t
 }
 
diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 632287df30..1139c6f665 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -42,6 +42,7 @@ type FileHasher struct {
 	secsize    int                      // section size
 	branches   int                      // branching factor
 	hasherFunc func() bmt.SectionWriter // SectionWriter // hasher constructor
+	batchSize  int                      // byte length of a batch
 	result     chan []byte              // channel to put hash asynchronously
 	digestSize int
 	dataLength int64
@@ -55,6 +56,7 @@ func NewFileHasher(hasherFunc func() bmt.SectionWriter, branches int, secSize in
 		result:     make(chan []byte),
 		branches:   branches,
 		secsize:    secSize,
+		batchSize:  branches * branches * secSize,
 	}
 	fh.lnBranches = math.Log(float64(branches))
 	fh.pool = sync.Pool{
@@ -198,7 +200,7 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 func (fh *FileHasher) OffsetToLevelDepth(c int64) int {
 	chunkCount := c / int64(fh.ChunkSize())
 	level := int(math.Log(float64(chunkCount)) / fh.lnBranches)
-	log.Warn("chunksize", "offset", c, "c", fh.ChunkSize(), "b", fh.branches, "s", fh.secsize, "count", chunkCount, "level", level)
+	//log.Warn("chunksize", "offset", c, "c", fh.ChunkSize(), "b", fh.branches, "s", fh.secsize, "count", chunkCount, "level", level)
 	return level
 }
 
@@ -230,9 +232,9 @@ func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 	}
 	nod.hasher.Write(batchNodePos/fh.BlockSize(), buf)
 	currentCount := atomic.AddInt32(&nod.secCnt, 1)
-	log.Debug("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "buf", buf[:])
+	log.Debug("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "node", fmt.Sprintf("%p", nod), "buf", buf[:])
 	if currentCount == int32(nod.branches) {
-		nod.done()
+		go nod.done(nod.ChunkSize())
 	}
 	return fh.BlockSize(), nil
 }
@@ -255,100 +257,103 @@ func (n *node) span() uint64 {
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
 
-	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "barch", fmt.Sprintf("%p", n.batch), "level", fmt.Sprintf("%p", n.getLevel(n.levelIndex)))
-	n.hasher.Write(sectionIndex/n.BlockSize(), section)
+	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "barch", fmt.Sprintf("%p", n.batch), "level", fmt.Sprintf("%p", n.getLevel(n.levelIndex)), "node", fmt.Sprintf("%p", n))
+	n.hasher.Write(sectionIndex, section)
 	bytePos := sectionIndex * n.BlockSize()
 	n.lock.Lock()
 	copy(n.nodeBuffer[bytePos:bytePos+n.BlockSize()], section)
 	n.lock.Unlock()
 	if currentCount == int32(n.branches) {
-		n.done()
+		go n.done(n.ChunkSize())
 	}
 }
 
-func (n *node) done() {
+func (n *node) done(l int) {
 	parentBatchIndex := n.index / n.branches
 	parentBatch := n.getLevel(n.levelIndex + 1).getOrCreateBatch(parentBatchIndex)
-	go func(parentBatch *batch) {
-		parentNodeIndex := n.index % n.branches
-		parentNode := parentBatch.nodes[parentNodeIndex]
-		serializedLength := make([]byte, 8)
-		binary.BigEndian.PutUint64(serializedLength, parentNode.span())
-		h := n.hasher.Sum(nil, n.ChunkSize(), serializedLength)
-		parentNode.write(n.pos, h)
-	}(parentBatch)
-
+	parentNodeIndex := n.index % n.branches
+	parentNode := parentBatch.nodes[parentNodeIndex]
+	serializedLength := make([]byte, 8)
+	binary.BigEndian.PutUint64(serializedLength, parentNode.span())
+	h := n.hasher.Sum(nil, l, serializedLength)
+	parentNode.write(n.pos, h)
 }
 
 // length is global length
-func (n *node) sum(length int64, nodeSpan int64) {
+func (n *node) sum(length int64, span int64) {
 
 	if length == 0 {
 		n.result <- n.hasher.Sum(nil, 0, nil)
 		return
 	}
-	log.Warn("node sum 0", "l", length, "span", nodeSpan)
-	// nodeSpan is the total byte size of a complete tree under the current node
-	levelMul := int64(n.levelIndex * n.ChunkSize())
-	if levelMul > 0 {
-		nodeSpan *= levelMul
-	}
+	// span is the total byte size of a complete tree under the current node
+	span *= int64(n.branches)
 
 	// dataLength is the actual length of data under the current node
 	var dataLength uint64
-	dataLength = uint64(length) % uint64(nodeSpan)
-	if n.levelIndex == 0 && dataLength == 0 {
-		dataLength = uint64(n.ChunkSize())
-	}
+	dataLength = uint64(length) % uint64(span)
 
 	// meta is the length of actual data in the nodespan
 	meta := make([]byte, 8)
 	binary.BigEndian.PutUint64(meta, dataLength)
 
-	log.Debug("underlen", "l", dataLength, "nextlevel", n.levelIndex+1)
-
 	// bmtLength is the actual length of bytes in the chunk
 	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
 	var bmtLength uint64
 	if n.levelIndex == 0 {
 		bmtLength = dataLength
 	} else {
-		bmtLength = (dataLength - 1) / (uint64((nodeSpan/int64(n.branches) + 1) * int64(n.secsize)))
+		denom := float64(span / int64(n.branches))
+		div := float64(dataLength)
+		bmtLength = uint64(div/denom) * uint64(n.secsize)
+		log.Debug("bmtlengthcalc", "denom", denom, "div", div, "bmtl", bmtLength)
 	}
 
 	// if a new batch would be started
-
 	var parentNode *node
-	if n.levelIndex != len(n.levels)-1 {
-		batchSpan := nodeSpan * int64(n.branches)
-		nodeIndex := ((length % int64(batchSpan)) - 1) / int64(n.ChunkSize())
-		nodeBatchIndex := ((length % int64(n.branches)) - 1) / int64(n.branches*n.ChunkSize())
-		batchIndex := (length - 1) / int64(batchSpan) // + 1
-
-		//parentLevel := n.getLevel(n.levelIndex + 1)
-		parentLevel := n.levels[n.levelIndex+1]
-		parentBatch := parentLevel.getBatch(int(batchIndex))
-		if parentBatch != nil {
-			parentNode = parentBatch.nodes[nodeBatchIndex]
+	nextLevel := n.levelIndex + 1
+	if nextLevel != len(n.levels) {
+		var levelBytePos = length
+		for i := 0; i < nextLevel; i++ {
+			levelBytePos /= int64(n.branches)
 		}
-
-		log.Warn("node sum 1", "b", n.branches, "lv", len(n.levels), "nln", n.levelIndex, "nidx", nodeIndex, "parentnode", fmt.Sprintf("%p", parentNode), "parentlevel", parentLevel)
-
-		if parentBatch != nil {
-			b := parentBatch.nodes[0].getBuffer()
-			log.Warn("node sum 2", "batchindex", batchIndex, "buf", b)
+		parentBatchIndex := levelBytePos / int64(n.branches*n.ChunkSize())
+		parentNodeIndex := (levelBytePos % int64(n.branches*n.ChunkSize()) / int64(n.ChunkSize()))
+		log.Debug("next", "parentbatchindex", parentBatchIndex, "parentnodeindex", parentNodeIndex, "levelbytepos", levelBytePos)
+		//if levelBytePos < int64(n.ChunkSize()) {
+		if levelBytePos > 0 {
+			parentLevel := n.levels[nextLevel]
+			parentBatch := parentLevel.getBatch(int(parentBatchIndex))
+			log.Debug("parentbatch", "b", fmt.Sprintf("%p", parentBatch), "l", parentLevel)
+			if parentBatch != nil {
+				parentNode = parentBatch.nodes[parentNodeIndex]
+			}
 		}
+		//parentBatchSpan := span * int64(n.branches)
+		//parentNodeIndex := ((length % int64(parentBatchSpan)) - 1) / int64(n.ChunkSize())
+		//nodeIndex := (length%span - 1) / int64(n.ChunkSize())
+		//nodeBatchIndex := ((length % int64(n.branches)) - 1) / int64(n.branches*n.ChunkSize())
+		//		parentBatchIndex := (length - 1) / int64(parentBatchSpan) // + 1
+		//
+		//		parentLevel := n.levels[n.levelIndex+1]
+		//		parentBatch := parentLevel.getBatch(int(parentBatchIndex))
+		//		if parentBatch != nil {
+		//			parentNode = parentBatch.nodes[nodeIndex]
+		//		}
+		//		log.Warn("node sum 1", "batchindex", batchIndex, "b", n.branches, "lv", len(n.levels), "nln", n.levelIndex, "nidx", nodeIndex, "parentnode", fmt.Sprintf("%p", parentNode), "parentlevel", parentLevel)
+
 	}
 
 	// are we on the root level?
 	if parentNode != nil {
-		log.Warn("continue", "hasher", fmt.Sprintf("%p", n.hasher), "parent", fmt.Sprintf("%p", parentNode), "this", fmt.Sprintf("%p", n))
-		parentNode.sum(length, nodeSpan)
+		log.Warn("continue", "hasher", fmt.Sprintf("%p", n.hasher), "parent", fmt.Sprintf("%p", parentNode), "thisnode", fmt.Sprintf("%p", n))
+		parentNode.sum(length, span)
 		return
 	}
 
-	log.Debug("summing", "l", length, "dl", dataLength, "meta", meta, "bmtlength", bmtLength, "hasher", fmt.Sprintf("%p", n.hasher), "this", fmt.Sprintf("%p", n))
-	hash := n.hasher.Sum(nil, int(dataLength), meta)
+	log.Debug("summing", "l", length, "dl", dataLength, "meta", meta, "bmtlength", bmtLength, "hasher", fmt.Sprintf("%p", n.hasher), "thisnode", fmt.Sprintf("%p", n), "l", n.levelIndex, "span", span)
+	log.Debug("nodebuffer", "b", n.nodeBuffer)
+	hash := n.hasher.Sum(nil, int(bmtLength), meta)
 	n.result <- hash
 }
 
@@ -381,7 +386,11 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 
 	// asynchronously call sum on this node and wait for the final result
 	go func() {
-		lastNode.sum(fh.dataLength, int64(fh.ChunkSize()))
+		chunkDataLength := int(fh.dataLength) % fh.ChunkSize()
+		if chunkDataLength > 0 && fh.dataLength != 0 {
+			lastNode.done(chunkDataLength)
+		}
+		lastNode.sum(fh.dataLength, int64(fh.BlockSize()))
 	}()
 	return <-fh.result
 }
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 8f01cd2fbf..c4bbfc9595 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -2,11 +2,11 @@ package storage
 
 import (
 	"bytes"
-	//crand "crypto/rand"
-	"encoding/binary"
+	crand "crypto/rand"
+	//"encoding/binary"
 	"io"
 	//"math/rand"
-	"hash"
+	"fmt"
 	"testing"
 	"time"
 
@@ -58,25 +58,40 @@ func TestWriteBuffer(t *testing.T) {
 	}
 }
 
+func newSerialData(l int) ([]byte, error) {
+	data := make([]byte, l)
+	for i := 0; i < len(data); i++ {
+		data[i] = byte(i % 255)
+	}
+	return data, nil
+}
+
+func newRandomData(l int) ([]byte, error) {
+	data := make([]byte, l)
+	c, err := crand.Read(data)
+	if err != nil {
+		return nil, err
+	} else if c != len(data) {
+		return nil, fmt.Errorf("short read (%d)", c)
+	}
+	return data, nil
+}
+
 func TestSum(t *testing.T) {
 
+	dataFunc := newSerialData
 	fh := NewFileHasher(newAsyncHasher, 128, 32)
-	dataLength := 2 * fh.ChunkSize()
-	data := make([]byte, dataLength)
-	//c, err := crand.Read(data)
-	//	if err != nil {
-	//		t.Fatal(err)
-	//	} else if c != len(data) {
-	//		t.Fatalf("short read %d", c)
-	//	}
-	for i := 0; i < len(data); i++ {
-		data[i] = byte(i % 256)
+	dataLength := fh.ChunkSize() * 127
+	data, err := dataFunc(dataLength)
+	if err != nil {
+		t.Fatal(err)
 	}
+	r := bytes.NewReader(data)
 	var offsets []int
 	for i := 0; i < len(data)/32; i++ {
 		offsets = append(offsets, i*32)
 	}
-	r := bytes.NewReader(data)
+
 	//	for {
 	//		if len(offsets) == 0 {
 	//			break
@@ -111,22 +126,21 @@ func TestSum(t *testing.T) {
 		}
 	}
 
-	hasher := func() hash.Hash {
-		return sha3.NewKeccak256()
-	}
-	rb := bmt.NewRefHasher(hasher, dataLength)
-	meta := make([]byte, 8)
-	binary.BigEndian.PutUint64(meta, uint64(dataLength))
-	res := rb.Hash(data)
-	shasher := hasher()
-	shasher.Reset()
-	shasher.Write(meta)
-	shasher.Write(res)
-	x := shasher.Sum(nil)
+	//	rb := bmt.NewRefHasher(sha3.NewKeccak256, 128)
+	//	meta := make([]byte, 8)
+	//	binary.BigEndian.PutUint64(meta, uint64(dataLength))
+	//	res := make([]byte, 64)
+	//	copy(res, rb.Hash(data[:fh.ChunkSize()]))
+	//	copy(res[32:], rb.Hash(data[fh.ChunkSize():]))
+	//	t.Logf("data length %d chunksize %d res %x", dataLength, fh.ChunkSize(), res)
+	//	root := rb.Hash(res)
+	//	shasher := sha3.NewKeccak256()
+	//	shasher.Write(meta)
+	//	shasher.Write(root)
+	//	x := shasher.Sum(nil)
 
-	time.Sleep(time.Second)
-	t.Logf("hash ref raw: %x", res)
-	t.Logf("hash ref dosum: %x", x)
+	time.Sleep(time.Second * 1)
+	//t.Logf("hash ref dosum: %x", x)
 	fh.SetLength(int64(dataLength))
 	h := fh.Sum(nil)
 	t.Logf("hash: %x", h)

From d559e3cc673b97a06552538468eeacfafd7e3737 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 7 Sep 2018 10:55:32 +0200
Subject: [PATCH 10/50] swarm/storage: Filehasher < 1 * batch correct

---
 swarm/storage/filehasher.go      | 249 ++++++++++++++++++++-----------
 swarm/storage/filehasher_test.go | 155 +++++++++----------
 2 files changed, 232 insertions(+), 172 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 1139c6f665..7e4f7572c9 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -138,7 +138,6 @@ func (lev *level) getBatch(index int) *batch {
 // if it does not currently exist, create it
 func (lev *level) getOrCreateBatch(index int) *batch {
 	pb := lev.getBatch(index)
-	log.Warn("getbatch", "b", fmt.Sprintf("%p", pb))
 	if pb == nil {
 		pb = lev.pool.Get().(*batch)
 		pb.index = index
@@ -179,7 +178,6 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 	bt = &batch{
 		batchBuffer: make([]byte, fh.branches*chunkSize),
 	}
-	log.Debug("newbatch", "bufat", fmt.Sprintf("%p", bt.batchBuffer))
 	for i := range nodes {
 		offset := chunkSize * i
 		nodes[i] = &node{
@@ -191,7 +189,6 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 		}
 	}
 
-	log.Debug("newbatch node", "bufat", fmt.Sprintf("%p", nodes[0].batchBuffer), "node frst bufat", fmt.Sprintf("%p", nodes[0].nodeBuffer), "node last bufat", fmt.Sprintf("%p", nodes[len(nodes)-1].nodeBuffer))
 	bt.nodes = nodes
 	return bt
 }
@@ -200,12 +197,12 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 func (fh *FileHasher) OffsetToLevelDepth(c int64) int {
 	chunkCount := c / int64(fh.ChunkSize())
 	level := int(math.Log(float64(chunkCount)) / fh.lnBranches)
-	//log.Warn("chunksize", "offset", c, "c", fh.ChunkSize(), "b", fh.branches, "s", fh.secsize, "count", chunkCount, "level", level)
 	return level
 }
 
 // writes data to offset count position
-func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
+//func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
+func (fh *FileHasher) WriteBuffer(globalCount int, buf []byte) (int, error) {
 
 	// writes are only valid on section thresholds
 	if globalCount%fh.BlockSize() > 0 {
@@ -217,24 +214,14 @@ func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 	batchPos := globalCount % (fh.branches * fh.ChunkSize())
 	batchNodeIndex := batchPos / fh.ChunkSize()
 	batchNodePos := batchPos % fh.ChunkSize()
-	log.Debug("batch", "nodepos", batchNodePos, "node", batchNodeIndex, "global", globalCount, "batchindex", batchIndex, "batchpos", batchPos, "blockSize", fh.BlockSize())
 	bt := fh.levels[0].getOrCreateBatch(batchIndex)
 	nod := bt.nodes[batchNodeIndex]
 
-	nod.lock.Lock()
-	buf := nod.nodeBuffer[batchNodePos : batchNodePos+fh.BlockSize()]
-	c, err := r.Read(buf)
-	nod.lock.Unlock()
-	if err != nil {
-		return 0, err
-	} else if c < fh.BlockSize() {
-		return 0, io.ErrUnexpectedEOF
-	}
 	nod.hasher.Write(batchNodePos/fh.BlockSize(), buf)
 	currentCount := atomic.AddInt32(&nod.secCnt, 1)
-	log.Debug("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "node", fmt.Sprintf("%p", nod), "buf", buf[:])
+	log.Trace("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "node", fmt.Sprintf("%p", nod), "buf", buf[:])
 	if currentCount == int32(nod.branches) {
-		go nod.done(nod.ChunkSize())
+		go nod.done(nod.ChunkSize(), nod.ChunkSize())
 	}
 	return fh.BlockSize(), nil
 }
@@ -246,121 +233,210 @@ func (fh *FileHasher) SetLength(l int64) {
 }
 
 // dataSpan returns the size of data encoded under the current node
-func (n *node) span() uint64 {
+func (n *node) span(l uint64) uint64 {
 	span := uint64(n.ChunkSize())
-	for l := 0; l < n.levelIndex; l++ {
+	var lev int
+	for lev = 0; lev < n.levelIndex; lev++ {
 		span *= uint64(n.branches)
 	}
+	if l < span && lev == 0 {
+		return l
+	}
 	return span
 }
 
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
 
-	log.Debug("writing", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "barch", fmt.Sprintf("%p", n.batch), "level", fmt.Sprintf("%p", n.getLevel(n.levelIndex)), "node", fmt.Sprintf("%p", n))
+	log.Debug("write intermediate", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "barch", fmt.Sprintf("%p", n.batch), "level", fmt.Sprintf("%p", n.getLevel(n.levelIndex)), "node", fmt.Sprintf("%p", n))
 	n.hasher.Write(sectionIndex, section)
 	bytePos := sectionIndex * n.BlockSize()
-	n.lock.Lock()
 	copy(n.nodeBuffer[bytePos:bytePos+n.BlockSize()], section)
-	n.lock.Unlock()
 	if currentCount == int32(n.branches) {
-		go n.done(n.ChunkSize())
+		go n.done(n.ChunkSize(), n.ChunkSize())
 	}
 }
 
-func (n *node) done(l int) {
+func (n *node) done(nodeLength int, spanLength int) {
 	parentBatchIndex := n.index / n.branches
 	parentBatch := n.getLevel(n.levelIndex + 1).getOrCreateBatch(parentBatchIndex)
 	parentNodeIndex := n.index % n.branches
 	parentNode := parentBatch.nodes[parentNodeIndex]
 	serializedLength := make([]byte, 8)
-	binary.BigEndian.PutUint64(serializedLength, parentNode.span())
-	h := n.hasher.Sum(nil, l, serializedLength)
+	binary.LittleEndian.PutUint64(serializedLength, uint64(spanLength)) //n.span(uint64(totalLength)))
+	//log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode))
+	log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode), "l", nodeLength)
+	h := n.hasher.Sum(nil, nodeLength, serializedLength)
 	parentNode.write(n.pos, h)
 }
 
 // length is global length
-func (n *node) sum(length int64, span int64) {
+func (n *node) sum(length int64, potentialSpan int64) {
 
 	if length == 0 {
 		n.result <- n.hasher.Sum(nil, 0, nil)
 		return
 	}
 	// span is the total byte size of a complete tree under the current node
-	span *= int64(n.branches)
+	potentialSpan *= int64(n.branches)
 
 	// dataLength is the actual length of data under the current node
+	// bmtLength is the actual length of bytes in the chunk to be summed
+	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
 	var dataLength uint64
-	dataLength = uint64(length) % uint64(span)
-
-	// meta is the length of actual data in the nodespan
+	//var bmtLength int
+	//if n.levelIndex == 0 {
+	//	dataLength = uint64(length)
+	//	bmtLength = int(dataLength)
+	//} else {
+	dataLength = uint64(length) % uint64(potentialSpan)
+	//denom := float64(span / int64(n.branches))
+	//div := float64(dataLength)
+	//bmtLength = int(uint64(div/denom) * uint64(n.secsize))
+	//}
+
+	// meta is the length of actual data in the nodespan serialized little-endian
 	meta := make([]byte, 8)
-	binary.BigEndian.PutUint64(meta, dataLength)
+	if dataLength == 0 {
+		binary.LittleEndian.PutUint64(meta, uint64(length))
+	} else {
+		binary.LittleEndian.PutUint64(meta, dataLength)
+	}
 
-	// bmtLength is the actual length of bytes in the chunk
-	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
-	var bmtLength uint64
+	// we already checked on top if length is 0. If it is 0 here, it's on span threshold and a full chunk write
+	// otherwise we do not have a full chunk write, and need to make the underlying hash sum
+	if dataLength == 0 {
+		//dataLength = uint64(potentialSpan)
+		// get the parent node if it exists
+		parentNode := n.getParent(length)
+		parentNode.sum(length, potentialSpan)
+		return
+	}
+
+	var bmtLength int
 	if n.levelIndex == 0 {
-		bmtLength = dataLength
+		bmtLength = int(dataLength)
 	} else {
-		denom := float64(span / int64(n.branches))
-		div := float64(dataLength)
-		bmtLength = uint64(div/denom) * uint64(n.secsize)
-		log.Debug("bmtlengthcalc", "denom", denom, "div", div, "bmtl", bmtLength)
+		log.Debug("calc bmtl", "dl", dataLength, "span", potentialSpan)
+		bmtLength = int(((dataLength-1)/uint64((potentialSpan/int64(n.branches))) + 1) * uint64(n.BlockSize()))
 	}
 
-	// if a new batch would be started
-	var parentNode *node
-	nextLevel := n.levelIndex + 1
-	if nextLevel != len(n.levels) {
-		var levelBytePos = length
-		for i := 0; i < nextLevel; i++ {
-			levelBytePos /= int64(n.branches)
-		}
-		parentBatchIndex := levelBytePos / int64(n.branches*n.ChunkSize())
-		parentNodeIndex := (levelBytePos % int64(n.branches*n.ChunkSize()) / int64(n.ChunkSize()))
-		log.Debug("next", "parentbatchindex", parentBatchIndex, "parentnodeindex", parentNodeIndex, "levelbytepos", levelBytePos)
-		//if levelBytePos < int64(n.ChunkSize()) {
-		if levelBytePos > 0 {
-			parentLevel := n.levels[nextLevel]
-			parentBatch := parentLevel.getBatch(int(parentBatchIndex))
-			log.Debug("parentbatch", "b", fmt.Sprintf("%p", parentBatch), "l", parentLevel)
-			if parentBatch != nil {
-				parentNode = parentBatch.nodes[parentNodeIndex]
-			}
+	log.Debug("bmtl", "l", bmtLength, "dl", dataLength, "n", fmt.Sprintf("%p", n), "pos", n.pos, "seccnt", n.secCnt)
+	if n.secCnt > 1 {
+		n.done(int(bmtLength), int(dataLength))
+		parentNode := n.getParent(length)
+		parentNode.sum(length, potentialSpan)
+		return
+	}
+
+	// if we're already at batch index one, the total data is a single data section
+	//if n.index == 0 {
+	if n.pos == 0 {
+		// if it's on data level, we have to make the hash
+		// otherwise it's already hashed
+		if n.levelIndex == 0 {
+			n.result <- n.hasher.Sum(nil, bmtLength, meta) //nodeBuffer[:n.BlockSize()]
+		} else {
+			n.result <- n.nodeBuffer[:n.BlockSize()]
 		}
-		//parentBatchSpan := span * int64(n.branches)
-		//parentNodeIndex := ((length % int64(parentBatchSpan)) - 1) / int64(n.ChunkSize())
-		//nodeIndex := (length%span - 1) / int64(n.ChunkSize())
-		//nodeBatchIndex := ((length % int64(n.branches)) - 1) / int64(n.branches*n.ChunkSize())
-		//		parentBatchIndex := (length - 1) / int64(parentBatchSpan) // + 1
-		//
-		//		parentLevel := n.levels[n.levelIndex+1]
-		//		parentBatch := parentLevel.getBatch(int(parentBatchIndex))
-		//		if parentBatch != nil {
-		//			parentNode = parentBatch.nodes[nodeIndex]
-		//		}
-		//		log.Warn("node sum 1", "batchindex", batchIndex, "b", n.branches, "lv", len(n.levels), "nln", n.levelIndex, "nidx", nodeIndex, "parentnode", fmt.Sprintf("%p", parentNode), "parentlevel", parentLevel)
+		return
+	}
 
+	var levelCount int
+	prevIdx := n.index
+	for i := prevIdx; i > 0; i /= n.branches {
+		prevIdx = i
+		levelCount++
 	}
 
-	// are we on the root level?
-	if parentNode != nil {
-		log.Warn("continue", "hasher", fmt.Sprintf("%p", n.hasher), "parent", fmt.Sprintf("%p", parentNode), "thisnode", fmt.Sprintf("%p", n))
-		parentNode.sum(length, span)
-		return
+	// get the top node. This will always have free capacity
+	topRoot := n.levels[len(n.levels)-1].getBatch(0).nodes[0]
+	danglingTop := n.levelIndex + levelCount
+	log.Debug("levelcount", "l", levelCount, "previdx", prevIdx)
+	var nodeToWrite *node
+	// if there is a tree unconnected to the root, append to this and write result to root
+	if danglingTop == len(n.levels) {
+		nodeToWrite := n.levels[danglingTop].getBatch(0).nodes[prevIdx%n.branches]
+		log.Debug("have dangling", "n", nodeToWrite)
+		nodeToWrite.write(int(nodeToWrite.secCnt), n.hasher.Sum(nil, n.BlockSize(), meta))
+
+	} else {
+		nodeToWrite = n
 	}
 
-	log.Debug("summing", "l", length, "dl", dataLength, "meta", meta, "bmtlength", bmtLength, "hasher", fmt.Sprintf("%p", n.hasher), "thisnode", fmt.Sprintf("%p", n), "l", n.levelIndex, "span", span)
-	log.Debug("nodebuffer", "b", n.nodeBuffer)
-	hash := n.hasher.Sum(nil, int(bmtLength), meta)
-	n.result <- hash
+	topRoot.write(int(topRoot.secCnt), nodeToWrite.hasher.Sum(nil, int(nodeToWrite.secCnt)*n.BlockSize(), meta))
+	binary.LittleEndian.PutUint64(meta, uint64(length))
+	log.Debug("top", "n", topRoot.nodeBuffer)
+	n.result <- topRoot.hasher.Sum(nil, int(topRoot.secCnt)*n.BlockSize(), meta)
+	return
+
+	// move up levels, when the node position is non-zero, then write to the parent (do what done does)
+	// batch index with be i
+	// nodeindex will be i mod branches
+
+	//}else if parentNode != nil {
+	// find out what the equivalent length is on the current level
+	//for i := 0; i < n.levelIndex; i++ {
+	//dataLength = uint64(math.Ceil(float64(dataLength / uint64(n.branches))))
+	//}
+	log.Debug("intermediate sum", "l", dataLength)
+	// sum the node (writes to the parent)
+	//n.done(int(dataLength), )
+	//}
+
+	//	log.Debug("sum", "dl", dataLength, "l", length, "s", potentialSpan, "n", fmt.Sprintf("%p", n), "pos", n.pos, "index", n.index, "secnt", n.secCnt, "parentnode", fmt.Sprintf("%p", parentNode))
+	//
+	//	// if this is the first node in the first batch we skip to the next level
+	//	//if n.index == 0 && n.pos == 0 {
+	//	// however, if we are on the top level, check if we only have one section written. If so, the hash is already done
+	//	if parentNode == nil {
+	//		if n.secCnt == 1 {
+	//			// if it's on data level we need to
+	//			log.Debug("directly return buffer", "n", n.nodeBuffer)
+	//			n.result <- n.nodeBuffer[:n.BlockSize()]
+	//			return
+	//		}
+	//		//topLength := int(n.secCnt * int32(n.BlockSize()))
+	//		log.Debug("hash top", "tl", bmtLength, "buf", n.nodeBuffer)
+	//		n.result <- n.hasher.Sum(nil, bmtLength, meta)
+	//		return
+	//	} else {
+	//		log.Debug("parent")
+	//		parentNode.sum(length, potentialSpan)
+	//		return
+	//	}
+	//
+	//	log.Debug("done")
+
+	//hash := n.hasher.Sum(nil, int(bmtLength), meta)
+	//n.result <- hash
 }
 
 func (fh *FileHasher) ChunkSize() int {
 	return fh.branches * fh.secsize
 }
 
+func (n *node) getParent(length int64) *node {
+	nextLevel := n.levelIndex + 1
+	if len(n.levels) > nextLevel {
+		var levelBytePos = length
+		for i := 0; i < nextLevel; i++ {
+			levelBytePos /= int64(n.branches)
+		}
+		parentBatchIndex := levelBytePos / int64(n.branches*n.ChunkSize())
+		parentNodeIndex := (levelBytePos % int64(n.branches*n.ChunkSize()) / int64(n.ChunkSize()))
+		//if levelBytePos > 0 {
+		parentLevel := n.levels[nextLevel]
+		parentBatch := parentLevel.getBatch(int(parentBatchIndex))
+		log.Debug("parentbatch", "b", fmt.Sprintf("%p", parentBatch), "level", nextLevel)
+		if parentBatch != nil {
+			return parentBatch.nodes[parentNodeIndex]
+		}
+		//}
+	}
+	return nil
+}
+
 // Louis note to self: secsize is the same as the size of the reference
 // Invoked after we know the actual length of the file
 // Will create the last node on the data level of the hash tree matching the length
@@ -372,7 +448,6 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 		return fh.hasherFunc().Sum(nil, 0, make([]byte, 8))
 	}
 
-	log.Debug("fh sum", "length", fh.dataLength)
 	// calculate the index the last batch
 	lastBatchIndexInFile := (fh.dataLength - 1) / int64(fh.ChunkSize()*fh.branches)
 
@@ -380,16 +455,14 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 	byteIndexInLastBatch := fh.dataLength - lastBatchIndexInFile*int64(fh.ChunkSize()*fh.branches)
 	nodeIndexInLastBatch := (int(byteIndexInLastBatch) - 1) / fh.ChunkSize()
 
-	// get the last node
+	// get the last node on the data level
 	lastNode := fh.levels[0].getBatch(int(lastBatchIndexInFile)).nodes[nodeIndexInLastBatch]
-	log.Debug("lastnode", "batchindex", lastBatchIndexInFile, "nodeindex", nodeIndexInLastBatch)
-
 	// asynchronously call sum on this node and wait for the final result
 	go func() {
-		chunkDataLength := int(fh.dataLength) % fh.ChunkSize()
-		if chunkDataLength > 0 && fh.dataLength != 0 {
-			lastNode.done(chunkDataLength)
-		}
+		//		nodeDataLength := fh.dataLength % int64(fh.ChunkSize())
+		//		if nodeDataLength > 0 {
+		//			lastNode.done(int(nodeDataLength))
+		//		}
 		lastNode.sum(fh.dataLength, int64(fh.BlockSize()))
 	}()
 	return <-fh.result
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index c4bbfc9595..171551baa0 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -6,6 +6,7 @@ import (
 	//"encoding/binary"
 	"io"
 	//"math/rand"
+	"context"
 	"fmt"
 	"testing"
 	"time"
@@ -15,9 +16,14 @@ import (
 	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
+var pool *bmt.TreePool
+
+func init() {
+	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
+}
+
 func newAsyncHasher() bmt.SectionWriter {
-	tp := bmt.NewTreePool(sha3.NewKeccak256, 128, 1)
-	h := bmt.New(tp)
+	h := bmt.New(pool)
 	return h.NewAsyncWriter(false)
 }
 
@@ -34,29 +40,30 @@ func TestLevelFromOffset(t *testing.T) {
 	}
 }
 
-func TestWriteBuffer(t *testing.T) {
-	data := []byte("0123456789abcdef")
-	fh := NewFileHasher(newAsyncHasher, 2, 2)
-	offsets := []int{12, 8, 4, 2, 6, 10, 0, 14}
-	r := bytes.NewReader(data)
-	for _, o := range offsets {
-		r.Seek(int64(o), io.SeekStart)
-		_, err := fh.WriteBuffer(o, r)
-		if err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	batchone := fh.levels[0].getBatch(0)
-	if !bytes.Equal(batchone.batchBuffer, data[:8]) {
-		t.Fatalf("expected batch one data %x, got %x", data[:8], batchone.batchBuffer)
-	}
-
-	batchtwo := fh.levels[0].getBatch(1)
-	if !bytes.Equal(batchtwo.batchBuffer, data[8:]) {
-		t.Fatalf("expected batch two data %x, got %x", data[8:], batchtwo.batchBuffer)
-	}
-}
+//
+//func TestWriteBuffer(t *testing.T) {
+//	data := []byte("0123456789abcdef")
+//	fh := NewFileHasher(newAsyncHasher, 2, 2)
+//	offsets := []int{12, 8, 4, 2, 6, 10, 0, 14}
+//	r := bytes.NewReader(data)
+//	for _, o := range offsets {
+//		r.Seek(int64(o), io.SeekStart)
+//		_, err := fh.WriteBuffer(o, r)
+//		if err != nil {
+//			t.Fatal(err)
+//		}
+//	}
+//
+//	batchone := fh.levels[0].getBatch(0)
+//	if !bytes.Equal(batchone.batchBuffer, data[:8]) {
+//		t.Fatalf("expected batch one data %x, got %x", data[:8], batchone.batchBuffer)
+//	}
+//
+//	batchtwo := fh.levels[0].getBatch(1)
+//	if !bytes.Equal(batchtwo.batchBuffer, data[8:]) {
+//		t.Fatalf("expected batch two data %x, got %x", data[8:], batchtwo.batchBuffer)
+//	}
+//}
 
 func newSerialData(l int) ([]byte, error) {
 	data := make([]byte, l)
@@ -79,69 +86,49 @@ func newRandomData(l int) ([]byte, error) {
 
 func TestSum(t *testing.T) {
 
+	var mismatch int
 	dataFunc := newSerialData
-	fh := NewFileHasher(newAsyncHasher, 128, 32)
-	dataLength := fh.ChunkSize() * 127
-	data, err := dataFunc(dataLength)
-	if err != nil {
-		t.Fatal(err)
-	}
-	r := bytes.NewReader(data)
-	var offsets []int
-	for i := 0; i < len(data)/32; i++ {
-		offsets = append(offsets, i*32)
-	}
+	chunkSize := 128 * 32
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize * 129, chunkSize * 130}
+	//dataLengths := []int{chunkSize * 2} //, chunkSize*128 + 32}
 
-	//	for {
-	//		if len(offsets) == 0 {
-	//			break
-	//		}
-	//		lastIndex := len(offsets) - 1
-	//		var c int
-	//		if len(offsets) > 1 {
-	//			c = rand.Intn(lastIndex)
-	//		}
-	//		offset := offsets[c]
-	//		if c != lastIndex {
-	//			offsets[c] = offsets[lastIndex]
-	//		}
-	//		offsets = offsets[:lastIndex]
-	//
-	//		r.Seek(int64(offset), io.SeekStart)
-	//		_, err := fh.WriteBuffer(offset, r)
-	//		if err != nil {
-	//			t.Fatal(err)
-	//		}
-	//	}
-	for i := 0; i < len(offsets); i++ {
-		//offset := offsets[i]
-		offset := i * 32
-		r.Seek(int64(offset), io.SeekStart)
-		log.Warn("write", "o", offset)
-		c, err := fh.WriteBuffer(offset, r)
+	for _, dl := range dataLengths {
+		chunks := dl / chunkSize
+		log.Debug("testing", "c", chunks, "s", dl%chunkSize)
+		fh := NewFileHasher(newAsyncHasher, 128, 32)
+		data, err := dataFunc(dl)
 		if err != nil {
 			t.Fatal(err)
-		} else if c < fh.BlockSize() {
-			t.Fatalf("short read %d", c)
 		}
-	}
+		for i := 0; i < len(data); i += 32 {
+			max := i + 32
+			if len(data) < max {
+				max = len(data)
+			}
+			_, err := fh.WriteBuffer(i, data[i:max])
+			if err != nil {
+				t.Fatal(err)
+			}
+		}
+
+		time.Sleep(time.Second * 1)
+		fh.SetLength(int64(dl))
+		h := fh.Sum(nil)
+
+		putGetter := newTestHasherStore(&fakeChunkStore{}, BMTHash)
 
-	//	rb := bmt.NewRefHasher(sha3.NewKeccak256, 128)
-	//	meta := make([]byte, 8)
-	//	binary.BigEndian.PutUint64(meta, uint64(dataLength))
-	//	res := make([]byte, 64)
-	//	copy(res, rb.Hash(data[:fh.ChunkSize()]))
-	//	copy(res[32:], rb.Hash(data[fh.ChunkSize():]))
-	//	t.Logf("data length %d chunksize %d res %x", dataLength, fh.ChunkSize(), res)
-	//	root := rb.Hash(res)
-	//	shasher := sha3.NewKeccak256()
-	//	shasher.Write(meta)
-	//	shasher.Write(root)
-	//	x := shasher.Sum(nil)
-
-	time.Sleep(time.Second * 1)
-	//t.Logf("hash ref dosum: %x", x)
-	fh.SetLength(int64(dataLength))
-	h := fh.Sum(nil)
-	t.Logf("hash: %x", h)
+		p, _, err := PyramidSplit(context.TODO(), io.LimitReader(bytes.NewReader(data), int64(len(data))), putGetter, putGetter)
+		if err != nil {
+			t.Fatalf(err.Error())
+		}
+
+		eq := bytes.Equal(p, h)
+		if !eq {
+			mismatch++
+		}
+		t.Logf("[%3d + %2d]\t%v\t%v\t%x", chunks, dl%chunkSize, eq, p, h)
+	}
+	if mismatch > 0 {
+		t.Fatalf("%d/%d mismatches", mismatch, len(dataLengths))
+	}
 }

From deaac9b12a81ff751a4d38b6d0ec190a308acf30 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 7 Sep 2018 11:40:11 +0200
Subject: [PATCH 11/50] swarm/storage: Passes sum test

Fails on chunksize*128^2
Also hangs on smaller tree pool in bmt
---
 swarm/storage/filehasher.go      | 89 ++++----------------------------
 swarm/storage/filehasher_test.go | 45 +---------------
 2 files changed, 12 insertions(+), 122 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 7e4f7572c9..625edaa3d4 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -76,8 +76,7 @@ func NewFileHasher(hasherFunc func() bmt.SectionWriter, branches int, secSize in
 // level captures one level of chunks in the swarm hash tree
 // singletons are attached to the lowest level
 type level struct {
-	levelIndex int // which level of the swarm hash tree
-	//batches     []*batch // active batches on the level
+	levelIndex  int // which level of the swarm hash tree
 	batches     sync.Map
 	*FileHasher // pointer to the underlying hasher
 }
@@ -99,8 +98,7 @@ type node struct {
 	secCnt     int32             // number of sections written
 	size       int
 	nodeBuffer []byte
-	//writeComplete chan struct{}
-	*batch // pointer to containing batch
+	*batch     // pointer to containing batch
 }
 
 // for logging purposes
@@ -193,15 +191,7 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 	return bt
 }
 
-// level depth is index of level ascending from data level towards tree root
-func (fh *FileHasher) OffsetToLevelDepth(c int64) int {
-	chunkCount := c / int64(fh.ChunkSize())
-	level := int(math.Log(float64(chunkCount)) / fh.lnBranches)
-	return level
-}
-
 // writes data to offset count position
-//func (fh *FileHasher) WriteBuffer(globalCount int, r io.Reader) (int, error) {
 func (fh *FileHasher) WriteBuffer(globalCount int, buf []byte) (int, error) {
 
 	// writes are only valid on section thresholds
@@ -253,7 +243,11 @@ func (n *node) write(sectionIndex int, section []byte) {
 	bytePos := sectionIndex * n.BlockSize()
 	copy(n.nodeBuffer[bytePos:bytePos+n.BlockSize()], section)
 	if currentCount == int32(n.branches) {
-		go n.done(n.ChunkSize(), n.ChunkSize())
+		if n.levelIndex == 0 {
+			go n.done(n.ChunkSize(), n.ChunkSize())
+		} else {
+			go n.done(n.ChunkSize(), n.ChunkSize()*(n.branches*n.levelIndex))
+		}
 	}
 }
 
@@ -263,8 +257,7 @@ func (n *node) done(nodeLength int, spanLength int) {
 	parentNodeIndex := n.index % n.branches
 	parentNode := parentBatch.nodes[parentNodeIndex]
 	serializedLength := make([]byte, 8)
-	binary.LittleEndian.PutUint64(serializedLength, uint64(spanLength)) //n.span(uint64(totalLength)))
-	//log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode))
+	binary.LittleEndian.PutUint64(serializedLength, uint64(spanLength))
 	log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode), "l", nodeLength)
 	h := n.hasher.Sum(nil, nodeLength, serializedLength)
 	parentNode.write(n.pos, h)
@@ -284,16 +277,7 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	// bmtLength is the actual length of bytes in the chunk to be summed
 	// if the node is an intermediate node (level != 0 && len(levels) > 1), bmtLength will be a multiple 32 bytes
 	var dataLength uint64
-	//var bmtLength int
-	//if n.levelIndex == 0 {
-	//	dataLength = uint64(length)
-	//	bmtLength = int(dataLength)
-	//} else {
 	dataLength = uint64(length) % uint64(potentialSpan)
-	//denom := float64(span / int64(n.branches))
-	//div := float64(dataLength)
-	//bmtLength = int(uint64(div/denom) * uint64(n.secsize))
-	//}
 
 	// meta is the length of actual data in the nodespan serialized little-endian
 	meta := make([]byte, 8)
@@ -306,7 +290,6 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	// we already checked on top if length is 0. If it is 0 here, it's on span threshold and a full chunk write
 	// otherwise we do not have a full chunk write, and need to make the underlying hash sum
 	if dataLength == 0 {
-		//dataLength = uint64(potentialSpan)
 		// get the parent node if it exists
 		parentNode := n.getParent(length)
 		parentNode.sum(length, potentialSpan)
@@ -329,13 +312,11 @@ func (n *node) sum(length int64, potentialSpan int64) {
 		return
 	}
 
-	// if we're already at batch index one, the total data is a single data section
-	//if n.index == 0 {
-	if n.pos == 0 {
+	if n.index == 0 && n.pos == 0 {
 		// if it's on data level, we have to make the hash
 		// otherwise it's already hashed
 		if n.levelIndex == 0 {
-			n.result <- n.hasher.Sum(nil, bmtLength, meta) //nodeBuffer[:n.BlockSize()]
+			n.result <- n.hasher.Sum(nil, bmtLength, meta)
 		} else {
 			n.result <- n.nodeBuffer[:n.BlockSize()]
 		}
@@ -368,48 +349,6 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	binary.LittleEndian.PutUint64(meta, uint64(length))
 	log.Debug("top", "n", topRoot.nodeBuffer)
 	n.result <- topRoot.hasher.Sum(nil, int(topRoot.secCnt)*n.BlockSize(), meta)
-	return
-
-	// move up levels, when the node position is non-zero, then write to the parent (do what done does)
-	// batch index with be i
-	// nodeindex will be i mod branches
-
-	//}else if parentNode != nil {
-	// find out what the equivalent length is on the current level
-	//for i := 0; i < n.levelIndex; i++ {
-	//dataLength = uint64(math.Ceil(float64(dataLength / uint64(n.branches))))
-	//}
-	log.Debug("intermediate sum", "l", dataLength)
-	// sum the node (writes to the parent)
-	//n.done(int(dataLength), )
-	//}
-
-	//	log.Debug("sum", "dl", dataLength, "l", length, "s", potentialSpan, "n", fmt.Sprintf("%p", n), "pos", n.pos, "index", n.index, "secnt", n.secCnt, "parentnode", fmt.Sprintf("%p", parentNode))
-	//
-	//	// if this is the first node in the first batch we skip to the next level
-	//	//if n.index == 0 && n.pos == 0 {
-	//	// however, if we are on the top level, check if we only have one section written. If so, the hash is already done
-	//	if parentNode == nil {
-	//		if n.secCnt == 1 {
-	//			// if it's on data level we need to
-	//			log.Debug("directly return buffer", "n", n.nodeBuffer)
-	//			n.result <- n.nodeBuffer[:n.BlockSize()]
-	//			return
-	//		}
-	//		//topLength := int(n.secCnt * int32(n.BlockSize()))
-	//		log.Debug("hash top", "tl", bmtLength, "buf", n.nodeBuffer)
-	//		n.result <- n.hasher.Sum(nil, bmtLength, meta)
-	//		return
-	//	} else {
-	//		log.Debug("parent")
-	//		parentNode.sum(length, potentialSpan)
-	//		return
-	//	}
-	//
-	//	log.Debug("done")
-
-	//hash := n.hasher.Sum(nil, int(bmtLength), meta)
-	//n.result <- hash
 }
 
 func (fh *FileHasher) ChunkSize() int {
@@ -425,22 +364,18 @@ func (n *node) getParent(length int64) *node {
 		}
 		parentBatchIndex := levelBytePos / int64(n.branches*n.ChunkSize())
 		parentNodeIndex := (levelBytePos % int64(n.branches*n.ChunkSize()) / int64(n.ChunkSize()))
-		//if levelBytePos > 0 {
 		parentLevel := n.levels[nextLevel]
 		parentBatch := parentLevel.getBatch(int(parentBatchIndex))
 		log.Debug("parentbatch", "b", fmt.Sprintf("%p", parentBatch), "level", nextLevel)
 		if parentBatch != nil {
 			return parentBatch.nodes[parentNodeIndex]
 		}
-		//}
 	}
 	return nil
 }
 
-// Louis note to self: secsize is the same as the size of the reference
 // Invoked after we know the actual length of the file
 // Will create the last node on the data level of the hash tree matching the length
-//func (fh *FileHasher) Sum(b []byte, length int, meta []byte) []byte {
 func (fh *FileHasher) Sum(b []byte) []byte {
 
 	// handle edge case where the file is empty
@@ -459,10 +394,6 @@ func (fh *FileHasher) Sum(b []byte) []byte {
 	lastNode := fh.levels[0].getBatch(int(lastBatchIndexInFile)).nodes[nodeIndexInLastBatch]
 	// asynchronously call sum on this node and wait for the final result
 	go func() {
-		//		nodeDataLength := fh.dataLength % int64(fh.ChunkSize())
-		//		if nodeDataLength > 0 {
-		//			lastNode.done(int(nodeDataLength))
-		//		}
 		lastNode.sum(fh.dataLength, int64(fh.BlockSize()))
 	}()
 	return <-fh.result
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 171551baa0..b08807838e 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -2,12 +2,10 @@ package storage
 
 import (
 	"bytes"
-	crand "crypto/rand"
-	//"encoding/binary"
-	"io"
-	//"math/rand"
 	"context"
+	crand "crypto/rand"
 	"fmt"
+	"io"
 	"testing"
 	"time"
 
@@ -27,44 +25,6 @@ func newAsyncHasher() bmt.SectionWriter {
 	return h.NewAsyncWriter(false)
 }
 
-func TestLevelFromOffset(t *testing.T) {
-	fh := NewFileHasher(newAsyncHasher, 128, 32)
-	sizes := []int{64, 127, 128, 129, 128*128 - 1, 128 * 128, 128 * 128 * 128 * 20}
-	expects := []int{0, 0, 1, 1, 1, 2, 3}
-	for i, sz := range sizes {
-		offset := fh.ChunkSize() * sz
-		lvl := fh.OffsetToLevelDepth(int64(offset))
-		if lvl != expects[i] {
-			t.Fatalf("offset %d (chunkcount %d), expected level %d, got %d", offset, sz, expects[i], lvl)
-		}
-	}
-}
-
-//
-//func TestWriteBuffer(t *testing.T) {
-//	data := []byte("0123456789abcdef")
-//	fh := NewFileHasher(newAsyncHasher, 2, 2)
-//	offsets := []int{12, 8, 4, 2, 6, 10, 0, 14}
-//	r := bytes.NewReader(data)
-//	for _, o := range offsets {
-//		r.Seek(int64(o), io.SeekStart)
-//		_, err := fh.WriteBuffer(o, r)
-//		if err != nil {
-//			t.Fatal(err)
-//		}
-//	}
-//
-//	batchone := fh.levels[0].getBatch(0)
-//	if !bytes.Equal(batchone.batchBuffer, data[:8]) {
-//		t.Fatalf("expected batch one data %x, got %x", data[:8], batchone.batchBuffer)
-//	}
-//
-//	batchtwo := fh.levels[0].getBatch(1)
-//	if !bytes.Equal(batchtwo.batchBuffer, data[8:]) {
-//		t.Fatalf("expected batch two data %x, got %x", data[8:], batchtwo.batchBuffer)
-//	}
-//}
-
 func newSerialData(l int) ([]byte, error) {
 	data := make([]byte, l)
 	for i := 0; i < len(data); i++ {
@@ -90,7 +50,6 @@ func TestSum(t *testing.T) {
 	dataFunc := newSerialData
 	chunkSize := 128 * 32
 	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize * 129, chunkSize * 130}
-	//dataLengths := []int{chunkSize * 2} //, chunkSize*128 + 32}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize

From fe6adde92cd9794933bd671e931205270a29084f Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 7 Sep 2018 20:41:15 +0200
Subject: [PATCH 12/50] swarm/storage: Filehasher pass with 4096 * (128^2)

---
 swarm/storage/filehasher.go      | 10 +++++++---
 swarm/storage/filehasher_test.go | 16 +++++++++++-----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 625edaa3d4..c9150d6fbc 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -246,7 +246,11 @@ func (n *node) write(sectionIndex int, section []byte) {
 		if n.levelIndex == 0 {
 			go n.done(n.ChunkSize(), n.ChunkSize())
 		} else {
-			go n.done(n.ChunkSize(), n.ChunkSize()*(n.branches*n.levelIndex))
+			span := n.ChunkSize()
+			for i := 0; i < n.levelIndex; i++ {
+				span *= n.branches
+			}
+			go n.done(n.ChunkSize(), span)
 		}
 	}
 }
@@ -362,11 +366,11 @@ func (n *node) getParent(length int64) *node {
 		for i := 0; i < nextLevel; i++ {
 			levelBytePos /= int64(n.branches)
 		}
-		parentBatchIndex := levelBytePos / int64(n.branches*n.ChunkSize())
+		parentBatchIndex := (levelBytePos - 1) / int64(n.branches*n.ChunkSize())
 		parentNodeIndex := (levelBytePos % int64(n.branches*n.ChunkSize()) / int64(n.ChunkSize()))
 		parentLevel := n.levels[nextLevel]
 		parentBatch := parentLevel.getBatch(int(parentBatchIndex))
-		log.Debug("parentbatch", "b", fmt.Sprintf("%p", parentBatch), "level", nextLevel)
+		log.Debug("parentbatch", "b", fmt.Sprintf("%p", parentBatch), "level", nextLevel, "nodeindex", parentNodeIndex)
 		if parentBatch != nil {
 			return parentBatch.nodes[parentNodeIndex]
 		}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index b08807838e..fabe229156 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -49,7 +49,7 @@ func TestSum(t *testing.T) {
 	var mismatch int
 	dataFunc := newSerialData
 	chunkSize := 128 * 32
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize * 129, chunkSize * 130}
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
@@ -74,20 +74,26 @@ func TestSum(t *testing.T) {
 		fh.SetLength(int64(dl))
 		h := fh.Sum(nil)
 
-		putGetter := newTestHasherStore(&fakeChunkStore{}, BMTHash)
+		p, err := referenceHash(data)
 
-		p, _, err := PyramidSplit(context.TODO(), io.LimitReader(bytes.NewReader(data), int64(len(data))), putGetter, putGetter)
 		if err != nil {
 			t.Fatalf(err.Error())
 		}
-
 		eq := bytes.Equal(p, h)
 		if !eq {
 			mismatch++
 		}
-		t.Logf("[%3d + %2d]\t%v\t%v\t%x", chunks, dl%chunkSize, eq, p, h)
+		t.Logf("[%3d + %2d]\t%v\t%x\t%x", chunks, dl%chunkSize, eq, p, h)
+		t.Logf("[%3d + %2d]\t%x", chunks, dl%chunkSize, h)
 	}
 	if mismatch > 0 {
 		t.Fatalf("%d/%d mismatches", mismatch, len(dataLengths))
 	}
 }
+
+func referenceHash(data []byte) ([]byte, error) {
+	//return []byte{}, nil
+	putGetter := newTestHasherStore(&fakeChunkStore{}, BMTHash)
+	p, _, err := PyramidSplit(context.TODO(), io.LimitReader(bytes.NewReader(data), int64(len(data))), putGetter, putGetter)
+	return p, err
+}

From 9050bc77280b493a28225e570c0c9ab8ebf36246 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Tue, 11 Sep 2018 16:55:07 +0200
Subject: [PATCH 13/50] swarm/storage: Possible pyramid fail on chunk*129

---
 swarm/storage/filehasher.go      | 49 +++++++++++++++++++++++---------
 swarm/storage/filehasher_test.go | 15 ++++++----
 2 files changed, 45 insertions(+), 19 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index c9150d6fbc..c305e4946a 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -153,6 +153,12 @@ func (b *batch) delink() {
 	b.mtx.Lock()
 	defer b.mtx.Unlock()
 	b.batches.Delete(b.index)
+	for _, n := range b.nodes {
+		n.hasher.Reset()
+	}
+	for i, _ := range b.batchBuffer {
+		b.batchBuffer[i] = byte(0x0)
+	}
 	b.pool.Put(b)
 }
 
@@ -183,7 +189,6 @@ func (fh *FileHasher) newBatch() (bt *batch) {
 			hasher:     fh.hasherFunc(),
 			nodeBuffer: bt.batchBuffer[offset : offset+chunkSize],
 			batch:      bt,
-			//writeComplete: make(chan struct{}),
 		}
 	}
 
@@ -209,7 +214,7 @@ func (fh *FileHasher) WriteBuffer(globalCount int, buf []byte) (int, error) {
 
 	nod.hasher.Write(batchNodePos/fh.BlockSize(), buf)
 	currentCount := atomic.AddInt32(&nod.secCnt, 1)
-	log.Trace("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "node", fmt.Sprintf("%p", nod), "buf", buf[:])
+	log.Trace("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "node", fmt.Sprintf("%p", nod), "batch", fmt.Sprintf("%p", nod.batch), "buf", buf[:])
 	if currentCount == int32(nod.branches) {
 		go nod.done(nod.ChunkSize(), nod.ChunkSize())
 	}
@@ -238,7 +243,7 @@ func (n *node) span(l uint64) uint64 {
 func (n *node) write(sectionIndex int, section []byte) {
 	currentCount := atomic.AddInt32(&n.secCnt, 1)
 
-	log.Debug("write intermediate", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "barch", fmt.Sprintf("%p", n.batch), "level", fmt.Sprintf("%p", n.getLevel(n.levelIndex)), "node", fmt.Sprintf("%p", n))
+	log.Debug("write intermediate", "pos", n.pos, "section", sectionIndex, "level", n.levelIndex, "data", section, "buffer", fmt.Sprintf("%p", n.nodeBuffer), "batchbuffer", fmt.Sprintf("%p", n.batchBuffer), "batch", fmt.Sprintf("%p", n.batch), "node", fmt.Sprintf("%p", n))
 	n.hasher.Write(sectionIndex, section)
 	bytePos := sectionIndex * n.BlockSize()
 	copy(n.nodeBuffer[bytePos:bytePos+n.BlockSize()], section)
@@ -262,9 +267,13 @@ func (n *node) done(nodeLength int, spanLength int) {
 	parentNode := parentBatch.nodes[parentNodeIndex]
 	serializedLength := make([]byte, 8)
 	binary.LittleEndian.PutUint64(serializedLength, uint64(spanLength))
-	log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode), "l", nodeLength)
+	log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode), "l", nodeLength, "pos", n.pos)
 	h := n.hasher.Sum(nil, nodeLength, serializedLength)
 	parentNode.write(n.pos, h)
+	if n.pos == n.branches-1 {
+		log.Debug("delink", "n", fmt.Sprintf("%p", n), "b", fmt.Sprintf("%p", n.batch))
+		//n.batch.delink()
+	}
 }
 
 // length is global length
@@ -309,22 +318,35 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	}
 
 	log.Debug("bmtl", "l", bmtLength, "dl", dataLength, "n", fmt.Sprintf("%p", n), "pos", n.pos, "seccnt", n.secCnt)
+
 	if n.secCnt > 1 {
+		log.Debug("seccnt > 1", "nbuf", n.nodeBuffer)
 		n.done(int(bmtLength), int(dataLength))
 		parentNode := n.getParent(length)
 		parentNode.sum(length, potentialSpan)
 		return
 	}
 
-	if n.index == 0 && n.pos == 0 {
-		// if it's on data level, we have to make the hash
-		// otherwise it's already hashed
-		if n.levelIndex == 0 {
-			n.result <- n.hasher.Sum(nil, bmtLength, meta)
-		} else {
-			n.result <- n.nodeBuffer[:n.BlockSize()]
+	if n.index == 0 {
+		if n.pos == 0 {
+			// if it's on data level, we have to make the hash
+			// otherwise it's already hashed
+			if n.levelIndex == 0 {
+				n.result <- n.hasher.Sum(nil, bmtLength, meta)
+				return
+			} else {
+				log.Debug("result direct no hash", "n", fmt.Sprintf("%p", n))
+				n.result <- n.nodeBuffer[:n.BlockSize()]
+				return
+			}
+			// TODO: instead of this situation we should find the correct parent directly and write the hash to it
+		} else if n.levelIndex > 0 {
+			parentNode := n.getParent(length)
+			parentNode.write(n.pos, n.nodeBuffer)
+			parentNode.sum(length, potentialSpan)
+			return
 		}
-		return
+
 	}
 
 	var levelCount int
@@ -337,7 +359,7 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	// get the top node. This will always have free capacity
 	topRoot := n.levels[len(n.levels)-1].getBatch(0).nodes[0]
 	danglingTop := n.levelIndex + levelCount
-	log.Debug("levelcount", "l", levelCount, "previdx", prevIdx)
+	log.Debug("levelcount", "l", levelCount, "previdx", prevIdx, "n", fmt.Sprintf("%p", n), "nindex", n.index)
 	var nodeToWrite *node
 	// if there is a tree unconnected to the root, append to this and write result to root
 	if danglingTop == len(n.levels) {
@@ -349,6 +371,7 @@ func (n *node) sum(length int64, potentialSpan int64) {
 		nodeToWrite = n
 	}
 
+	log.Debug("nodetowrite", "n", fmt.Sprintf("%p", nodeToWrite), "sec", nodeToWrite.secCnt)
 	topRoot.write(int(topRoot.secCnt), nodeToWrite.hasher.Sum(nil, int(nodeToWrite.secCnt)*n.BlockSize(), meta))
 	binary.LittleEndian.PutUint64(meta, uint64(length))
 	log.Debug("top", "n", topRoot.nodeBuffer)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index fabe229156..5d57870f4b 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -25,15 +25,16 @@ func newAsyncHasher() bmt.SectionWriter {
 	return h.NewAsyncWriter(false)
 }
 
-func newSerialData(l int) ([]byte, error) {
+func newSerialData(l int, offset int) ([]byte, error) {
 	data := make([]byte, l)
 	for i := 0; i < len(data); i++ {
-		data[i] = byte(i % 255)
+		data[i] = byte((i + offset) % 255)
 	}
 	return data, nil
 }
 
-func newRandomData(l int) ([]byte, error) {
+// offset doesn't matter here
+func newRandomData(l int, offset int) ([]byte, error) {
 	data := make([]byte, l)
 	c, err := crand.Read(data)
 	if err != nil {
@@ -47,15 +48,17 @@ func newRandomData(l int) ([]byte, error) {
 func TestSum(t *testing.T) {
 
 	var mismatch int
-	dataFunc := newSerialData
 	chunkSize := 128 * 32
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	serialOffset := 0
+	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	dataLengths := []int{chunkSize * 129}
+	//dataLengths := []int{chunkSize}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
 		log.Debug("testing", "c", chunks, "s", dl%chunkSize)
 		fh := NewFileHasher(newAsyncHasher, 128, 32)
-		data, err := dataFunc(dl)
+		data, err := newSerialData(dl, serialOffset)
 		if err != nil {
 			t.Fatal(err)
 		}

From be400d7460b4ce03b8dca227633fc28d8b5d1f33 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Wed, 12 Sep 2018 12:53:07 +0200
Subject: [PATCH 14/50] swarm/storage: Fix async issue causing different
 parents for same batch

---
 swarm/storage/filehasher.go      | 27 +++++++++--------
 swarm/storage/filehasher_test.go | 50 ++++++++++++++++++++++++++++++--
 2 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index c305e4946a..b144f7b555 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -92,13 +92,13 @@ type batch struct {
 
 // node represent a chunk and embeds an async interface to the chunk hash used
 type node struct {
-	lock       sync.Mutex
 	hasher     bmt.SectionWriter // async hasher
 	pos        int               // index of the node chunk within its batch
 	secCnt     int32             // number of sections written
 	size       int
 	nodeBuffer []byte
 	*batch     // pointer to containing batch
+	lock       sync.Mutex
 }
 
 // for logging purposes
@@ -116,6 +116,7 @@ func (lev *level) getLevel(pl int) (par *level) {
 	if pl < len(lev.levels) {
 		return lev.levels[pl]
 	}
+	log.Warn("creating level", "l", pl)
 	par = &level{
 		levelIndex: pl,
 		FileHasher: lev.FileHasher,
@@ -216,7 +217,7 @@ func (fh *FileHasher) WriteBuffer(globalCount int, buf []byte) (int, error) {
 	currentCount := atomic.AddInt32(&nod.secCnt, 1)
 	log.Trace("fh writebuf", "c", globalCount, "s", globalCount/fh.BlockSize(), "seccnt", nod.secCnt, "branches", nod.branches, "buflen", len(buf), "node", fmt.Sprintf("%p", nod), "batch", fmt.Sprintf("%p", nod.batch), "buf", buf[:])
 	if currentCount == int32(nod.branches) {
-		go nod.done(nod.ChunkSize(), nod.ChunkSize())
+		nod.done(nod.ChunkSize(), nod.ChunkSize(), nod.getOrCreateParent())
 	}
 	return fh.BlockSize(), nil
 }
@@ -249,22 +250,25 @@ func (n *node) write(sectionIndex int, section []byte) {
 	copy(n.nodeBuffer[bytePos:bytePos+n.BlockSize()], section)
 	if currentCount == int32(n.branches) {
 		if n.levelIndex == 0 {
-			go n.done(n.ChunkSize(), n.ChunkSize())
+			go n.done(n.ChunkSize(), n.ChunkSize(), n.getOrCreateParent())
 		} else {
 			span := n.ChunkSize()
 			for i := 0; i < n.levelIndex; i++ {
 				span *= n.branches
 			}
-			go n.done(n.ChunkSize(), span)
+			go n.done(n.ChunkSize(), span, n.getOrCreateParent())
 		}
 	}
 }
 
-func (n *node) done(nodeLength int, spanLength int) {
+func (n *node) getOrCreateParent() *node {
 	parentBatchIndex := n.index / n.branches
 	parentBatch := n.getLevel(n.levelIndex + 1).getOrCreateBatch(parentBatchIndex)
 	parentNodeIndex := n.index % n.branches
-	parentNode := parentBatch.nodes[parentNodeIndex]
+	return parentBatch.nodes[parentNodeIndex]
+}
+
+func (n *node) done(nodeLength int, spanLength int, parentNode *node) {
 	serializedLength := make([]byte, 8)
 	binary.LittleEndian.PutUint64(serializedLength, uint64(spanLength))
 	log.Debug("node done", "n", fmt.Sprintf("%p", n), "serl", serializedLength, "parent", fmt.Sprintf("%p", parentNode), "l", nodeLength, "pos", n.pos)
@@ -320,8 +324,8 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	log.Debug("bmtl", "l", bmtLength, "dl", dataLength, "n", fmt.Sprintf("%p", n), "pos", n.pos, "seccnt", n.secCnt)
 
 	if n.secCnt > 1 {
-		log.Debug("seccnt > 1", "nbuf", n.nodeBuffer)
-		n.done(int(bmtLength), int(dataLength))
+		log.Debug("seccnt > 1", "nbuf", n.nodeBuffer, "dl", dataLength, "n", fmt.Sprintf("%p", n), "l", n.levelIndex)
+		n.done(int(bmtLength), int(dataLength), n.getOrCreateParent())
 		parentNode := n.getParent(length)
 		parentNode.sum(length, potentialSpan)
 		return
@@ -334,11 +338,10 @@ func (n *node) sum(length int64, potentialSpan int64) {
 			if n.levelIndex == 0 {
 				n.result <- n.hasher.Sum(nil, bmtLength, meta)
 				return
-			} else {
-				log.Debug("result direct no hash", "n", fmt.Sprintf("%p", n))
-				n.result <- n.nodeBuffer[:n.BlockSize()]
-				return
 			}
+			log.Debug("result direct no hash", "n", fmt.Sprintf("%p", n), "l", n.levelIndex)
+			n.result <- n.nodeBuffer[:n.BlockSize()]
+			return
 			// TODO: instead of this situation we should find the correct parent directly and write the hash to it
 		} else if n.levelIndex > 0 {
 			parentNode := n.getParent(length)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 5d57870f4b..52410540b3 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	crand "crypto/rand"
+	"encoding/binary"
 	"fmt"
 	"io"
 	"testing"
@@ -50,9 +51,10 @@ func TestSum(t *testing.T) {
 	var mismatch int
 	chunkSize := 128 * 32
 	serialOffset := 0
-	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{chunkSize * 129}
-	//dataLengths := []int{chunkSize}
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32}
+	//dataLengths := []int{chunkSize * 129}
+	//dataLengths := []int{chunkSize*128*128 + (128 * chunkSize)}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
@@ -92,6 +94,7 @@ func TestSum(t *testing.T) {
 	if mismatch > 0 {
 		t.Fatalf("%d/%d mismatches", mismatch, len(dataLengths))
 	}
+
 }
 
 func referenceHash(data []byte) ([]byte, error) {
@@ -100,3 +103,44 @@ func referenceHash(data []byte) ([]byte, error) {
 	p, _, err := PyramidSplit(context.TODO(), io.LimitReader(bytes.NewReader(data), int64(len(data))), putGetter, putGetter)
 	return p, err
 }
+
+func TestAnomaly(t *testing.T) {
+
+	correctData := []byte{48, 71, 216, 65, 7, 120, 152, 194, 107, 190, 107, 230, 82, 162, 236, 89, 10, 93, 155, 215, 205, 69, 210, 144, 234, 66, 81, 27, 72, 117, 60, 9, 129, 179, 29, 154, 127, 108, 55, 117, 35, 232, 118, 157, 176, 33, 9, 29, 242, 62, 221, 159, 215, 189, 107, 205, 241, 26, 34, 245, 24, 219, 96, 6}
+	correctHex := "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199"
+	var dataLength uint64 = 4096*128 + 4096
+
+	data := make([]byte, dataLength)
+	for i := uint64(0); i < dataLength; i++ {
+		data[i] = byte(i % 255)
+	}
+
+	leftChunk := make([]byte, 4096)
+
+	h := bmt.New(pool)
+	meta := make([]byte, 8)
+	binary.LittleEndian.PutUint64(meta, 4096)
+	for i := 0; i < 128; i++ {
+		h.ResetWithLength(meta)
+		h.Write(data[i*4096 : i*4096+4096])
+		copy(leftChunk[i*32:], h.Sum(nil))
+	}
+	binary.LittleEndian.PutUint64(meta, 4096*128)
+	h.ResetWithLength(meta)
+	h.Write(leftChunk)
+	leftChunkHash := h.Sum(nil)
+	t.Logf("%x %v %v", leftChunkHash, bytes.Equal(correctData[:32], leftChunkHash), meta)
+
+	binary.LittleEndian.PutUint64(meta, 4096)
+	h.ResetWithLength(meta)
+	h.Write(data[4096*128:])
+	rightChunkHash := h.Sum(nil)
+	t.Logf("%x %v %v", rightChunkHash, bytes.Equal(correctData[32:], rightChunkHash), meta)
+
+	binary.LittleEndian.PutUint64(meta, dataLength)
+	h.ResetWithLength(meta)
+	h.Write(leftChunkHash)
+	h.Write(rightChunkHash)
+	resultHex := fmt.Sprintf("%x", h.Sum(nil))
+	t.Logf("%v %v %v", resultHex, resultHex == correctHex, meta)
+}

From 8d08b1f5d89ef24c9d7d9edb3e255f1d4e2caa46 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Wed, 12 Sep 2018 23:10:45 +0200
Subject: [PATCH 15/50] swarm/storage: WIP reference filehasher

---
 swarm/storage/filehasher.go      |  3 ++-
 swarm/storage/filehasher_test.go | 33 ++++++++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index b144f7b555..0844f8d7fb 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -163,6 +163,7 @@ func (b *batch) delink() {
 	b.pool.Put(b)
 }
 
+// TODO: rename as blocksize in bmt is hardcoded 2*segmentsize (is that correct?) to avoid ambiguity
 func (fh *FileHasher) BlockSize() int {
 	return fh.secsize
 }
@@ -374,7 +375,7 @@ func (n *node) sum(length int64, potentialSpan int64) {
 		nodeToWrite = n
 	}
 
-	log.Debug("nodetowrite", "n", fmt.Sprintf("%p", nodeToWrite), "sec", nodeToWrite.secCnt)
+	log.Debug("nodetowrite", "n", fmt.Sprintf("%p", nodeToWrite), "sec", nodeToWrite.secCnt, "meta", meta)
 	topRoot.write(int(topRoot.secCnt), nodeToWrite.hasher.Sum(nil, int(nodeToWrite.secCnt)*n.BlockSize(), meta))
 	binary.LittleEndian.PutUint64(meta, uint64(length))
 	log.Debug("top", "n", topRoot.nodeBuffer)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 52410540b3..dd0191baa3 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -51,10 +51,8 @@ func TestSum(t *testing.T) {
 	var mismatch int
 	chunkSize := 128 * 32
 	serialOffset := 0
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32}
-	//dataLengths := []int{chunkSize * 129}
-	//dataLengths := []int{chunkSize*128*128 + (128 * chunkSize)}
+	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	dataLengths := []int{chunkSize * 2}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
@@ -144,3 +142,30 @@ func TestAnomaly(t *testing.T) {
 	resultHex := fmt.Sprintf("%x", h.Sum(nil))
 	t.Logf("%v %v %v", resultHex, resultHex == correctHex, meta)
 }
+
+func TestReferenceFileHasher(t *testing.T) {
+	h := bmt.New(pool)
+	var mismatch int
+	chunkSize := 128 * 32
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129} //, chunkSize * 130, chunkSize * 128 * 128}
+	//dataLengths := []int{31}
+	for _, dataLength := range dataLengths {
+		fh := NewReferenceFileHasher(h, 128)
+		data, _ := newSerialData(dataLength, 0)
+		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
+
+		pyramidHash, err := referenceHash(data)
+		if err != nil {
+			t.Fatalf(err.Error())
+		}
+
+		eq := bytes.Equal(pyramidHash, refHash)
+		if !eq {
+			mismatch++
+		}
+		t.Logf("[%7d+%4d]\tref: %x\tpyr: %x", dataLength/chunkSize, dataLength%chunkSize, refHash, pyramidHash)
+	}
+	if mismatch > 0 {
+		t.Fatalf("failed have %d mismatch", mismatch)
+	}
+}

From a55d5e87ae7c8d6792d07933f948596c4785a9d8 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Mon, 17 Sep 2018 14:18:41 +0200
Subject: [PATCH 16/50] swarm/storage: Proof of bug in Tree/Pyramid for
 dangling chunks

---
 swarm/storage/chunker.go         |  3 +-
 swarm/storage/chunker_test.go    | 50 ++++++++++++++++++++++++++++
 swarm/storage/common_test.go     | 15 +++++++--
 swarm/storage/filehasher_test.go | 56 +++++++++++++++-----------------
 4 files changed, 91 insertions(+), 33 deletions(-)

diff --git a/swarm/storage/chunker.go b/swarm/storage/chunker.go
index 40292e88f9..7bc8dee004 100644
--- a/swarm/storage/chunker.go
+++ b/swarm/storage/chunker.go
@@ -298,7 +298,6 @@ func (tc *TreeChunker) split(ctx context.Context, depth int, treeSize int64, add
 	// dept > 0
 	// intermediate chunk containing child nodes hashes
 	branchCnt := (size + treeSize - 1) / treeSize
-
 	var chunk = make([]byte, branchCnt*tc.hashSize+8)
 	var pos, i int64
 
@@ -336,6 +335,8 @@ func (tc *TreeChunker) split(ctx context.Context, depth int, treeSize int64, add
 	case tc.jobC <- &hashJob{addr, chunk, size, parentWg}:
 	case <-tc.quitC:
 	}
+
+	log.Trace("chunkdata", "d", chunk)
 }
 
 func (tc *TreeChunker) runWorker(ctx context.Context) {
diff --git a/swarm/storage/chunker_test.go b/swarm/storage/chunker_test.go
index 6172d8a092..e23cc24eeb 100644
--- a/swarm/storage/chunker_test.go
+++ b/swarm/storage/chunker_test.go
@@ -70,6 +70,41 @@ func testRandomBrokenData(n int, tester *chunkerTester) {
 	tester.t.Logf(" Address = %v\n", key)
 }
 
+func testSerialData(usePyramid bool, hash string, n int, tester *chunkerTester) Address {
+	if tester.inputs == nil {
+		tester.inputs = make(map[uint64][]byte)
+	}
+	input, found := tester.inputs[uint64(n)]
+	var data io.Reader
+	if !found {
+		data, input = generateSerialData(n, 255, 0)
+		tester.inputs[uint64(n)] = input
+	} else {
+		data = io.LimitReader(bytes.NewReader(input), int64(n))
+	}
+
+	putGetter := newTestHasherStore(NewMapChunkStore(), hash)
+
+	var addr Address
+	var wait func(context.Context) error
+	var err error
+	ctx := context.TODO()
+	if usePyramid {
+		addr, wait, err = PyramidSplit(ctx, data, putGetter, putGetter)
+	} else {
+		addr, wait, err = TreeSplit(ctx, data, int64(n), putGetter)
+	}
+	if err != nil {
+		tester.t.Fatalf(err.Error())
+	}
+	tester.t.Logf(" Key = %v\n", addr)
+	err = wait(ctx)
+	if err != nil {
+		tester.t.Fatalf(err.Error())
+	}
+	return addr
+}
+
 func testRandomData(usePyramid bool, hash string, n int, tester *chunkerTester) Address {
 	if tester.inputs == nil {
 		tester.inputs = make(map[uint64][]byte)
@@ -229,6 +264,21 @@ func TestDataAppend(t *testing.T) {
 	}
 }
 
+func TestSerialData(t *testing.T) {
+	sizes := []int{4096 * 129}
+	tester := &chunkerTester{t: t}
+
+	for _, s := range sizes {
+		treeChunkerKey := testSerialData(false, BMTHash, s, tester)
+		//		pyramidChunkerKey := testRandomData(true, SHA3Hash, s, tester)
+		//		if treeChunkerKey.String() != pyramidChunkerKey.String() {
+		//			tester.t.Fatalf("tree chunker and pyramid chunker key mismatch for size %v\n TC: %v\n PC: %v\n", s, treeChunkerKey.String(), pyramidChunkerKey.String())
+		//		}
+		t.Logf("chunker result: %s", treeChunkerKey)
+	}
+
+}
+
 func TestRandomData(t *testing.T) {
 	// This test can validate files up to a relatively short length, as tree chunker slows down drastically.
 	// Validation of longer files is done by TestLocalStoreAndRetrieve in swarm package.
diff --git a/swarm/storage/common_test.go b/swarm/storage/common_test.go
index 33133edd74..7b27498c60 100644
--- a/swarm/storage/common_test.go
+++ b/swarm/storage/common_test.go
@@ -174,12 +174,21 @@ func (r *brokenLimitedReader) Read(buf []byte) (int, error) {
 	return r.lr.Read(buf)
 }
 
-func testStoreRandom(m ChunkStore, n int, chunksize int64, t *testing.T) {
-	chunks, err := mputRandomChunks(m, n, chunksize)
+func generateSerialData(l int, mod int, offset int) (r io.Reader, slice []byte) {
+	slice = make([]byte, l)
+	for i := 0; i < len(slice); i++ {
+		slice[i] = byte((i + offset) % mod)
+	}
+	r = io.LimitReader(bytes.NewReader(slice), int64(l))
+	return
+}
+
+func testStoreRandom(m ChunkStore, processors int, n int, chunksize int64, t *testing.T) {
+	hs, err := mputRandomChunks(m, processors, n, chunksize)
 	if err != nil {
 		t.Fatalf("expected no error, got %v", err)
 	}
-	err = mget(m, chunkAddresses(chunks), nil)
+	err := mget(m, hs, nil)
 	if err != nil {
 		t.Fatalf("testStore failed: %v", err)
 	}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index dd0191baa3..078791706f 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -3,7 +3,6 @@ package storage
 import (
 	"bytes"
 	"context"
-	crand "crypto/rand"
 	"encoding/binary"
 	"fmt"
 	"io"
@@ -26,42 +25,19 @@ func newAsyncHasher() bmt.SectionWriter {
 	return h.NewAsyncWriter(false)
 }
 
-func newSerialData(l int, offset int) ([]byte, error) {
-	data := make([]byte, l)
-	for i := 0; i < len(data); i++ {
-		data[i] = byte((i + offset) % 255)
-	}
-	return data, nil
-}
-
-// offset doesn't matter here
-func newRandomData(l int, offset int) ([]byte, error) {
-	data := make([]byte, l)
-	c, err := crand.Read(data)
-	if err != nil {
-		return nil, err
-	} else if c != len(data) {
-		return nil, fmt.Errorf("short read (%d)", c)
-	}
-	return data, nil
-}
-
 func TestSum(t *testing.T) {
 
 	var mismatch int
 	chunkSize := 128 * 32
 	serialOffset := 0
 	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{chunkSize * 2}
+	dataLengths := []int{chunkSize * 129}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
 		log.Debug("testing", "c", chunks, "s", dl%chunkSize)
 		fh := NewFileHasher(newAsyncHasher, 128, 32)
-		data, err := newSerialData(dl, serialOffset)
-		if err != nil {
-			t.Fatal(err)
-		}
+		_, data := generateSerialData(dl, 255, serialOffset)
 		for i := 0; i < len(data); i += 32 {
 			max := i + 32
 			if len(data) < max {
@@ -105,7 +81,11 @@ func referenceHash(data []byte) ([]byte, error) {
 func TestAnomaly(t *testing.T) {
 
 	correctData := []byte{48, 71, 216, 65, 7, 120, 152, 194, 107, 190, 107, 230, 82, 162, 236, 89, 10, 93, 155, 215, 205, 69, 210, 144, 234, 66, 81, 27, 72, 117, 60, 9, 129, 179, 29, 154, 127, 108, 55, 117, 35, 232, 118, 157, 176, 33, 9, 29, 242, 62, 221, 159, 215, 189, 107, 205, 241, 26, 34, 245, 24, 219, 96, 6}
+	doubleHashedDataTwo := []byte{0, 111, 13, 142, 184, 222, 96, 141, 2, 241, 228, 138, 179, 76, 211, 246, 178, 202, 99, 167, 150, 179, 30, 118, 55, 144, 90, 113, 3, 128, 118, 23}
+
 	correctHex := "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199"
+	doubleHashedHex := "b7e298f61b1bf23e21d8f45bf545eb1d6c0c4eaaca7d2c2690fb86038404a6d6"
+
 	var dataLength uint64 = 4096*128 + 4096
 
 	data := make([]byte, dataLength)
@@ -123,24 +103,42 @@ func TestAnomaly(t *testing.T) {
 		h.Write(data[i*4096 : i*4096+4096])
 		copy(leftChunk[i*32:], h.Sum(nil))
 	}
+
+	// hash the first full batch
 	binary.LittleEndian.PutUint64(meta, 4096*128)
 	h.ResetWithLength(meta)
 	h.Write(leftChunk)
 	leftChunkHash := h.Sum(nil)
-	t.Logf("%x %v %v", leftChunkHash, bytes.Equal(correctData[:32], leftChunkHash), meta)
+	t.Logf("leftchunk\t%x %v %v", leftChunkHash, bytes.Equal(correctData[:32], leftChunkHash), meta)
 
+	// hash dangling chunk
 	binary.LittleEndian.PutUint64(meta, 4096)
 	h.ResetWithLength(meta)
 	h.Write(data[4096*128:])
 	rightChunkHash := h.Sum(nil)
-	t.Logf("%x %v %v", rightChunkHash, bytes.Equal(correctData[32:], rightChunkHash), meta)
+	t.Logf("rightchunk\t%x %v %v", rightChunkHash, bytes.Equal(correctData[32:], rightChunkHash), meta)
+
+	// now double hash the right side
+	h.ResetWithLength(meta)
+	h.Write(correctData[32:])
+	altRightChunkHash := h.Sum(nil) // alt-right is wrong, of course :)
+	t.Logf("altrightchunk\t%x %v %v", altRightChunkHash, bytes.Equal(doubleHashedDataTwo, altRightChunkHash), meta)
 
+	// this is the result we get from filehasher
 	binary.LittleEndian.PutUint64(meta, dataLength)
 	h.ResetWithLength(meta)
 	h.Write(leftChunkHash)
 	h.Write(rightChunkHash)
 	resultHex := fmt.Sprintf("%x", h.Sum(nil))
 	t.Logf("%v %v %v", resultHex, resultHex == correctHex, meta)
+
+	// this should match the result from treechunker and pyramidchunker
+	binary.LittleEndian.PutUint64(meta, dataLength)
+	h.ResetWithLength(meta)
+	h.Write(leftChunkHash)
+	h.Write(altRightChunkHash)
+	resultHex = fmt.Sprintf("%x", h.Sum(nil))
+	t.Logf("%v %v %v", resultHex, resultHex == doubleHashedHex, meta)
 }
 
 func TestReferenceFileHasher(t *testing.T) {
@@ -151,7 +149,7 @@ func TestReferenceFileHasher(t *testing.T) {
 	//dataLengths := []int{31}
 	for _, dataLength := range dataLengths {
 		fh := NewReferenceFileHasher(h, 128)
-		data, _ := newSerialData(dataLength, 0)
+		_, data := generateSerialData(dataLength, 255, 0)
 		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
 
 		pyramidHash, err := referenceHash(data)

From e0441640c6cbbab48df64565e187973fde7c25f0 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Tue, 18 Sep 2018 08:12:18 +0200
Subject: [PATCH 17/50] swarm/storage: Add missing filehasher ref src file

---
 swarm/storage/filehasher_r.go | 124 ++++++++++++++++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 swarm/storage/filehasher_r.go

diff --git a/swarm/storage/filehasher_r.go b/swarm/storage/filehasher_r.go
new file mode 100644
index 0000000000..56d5c49e8a
--- /dev/null
+++ b/swarm/storage/filehasher_r.go
@@ -0,0 +1,124 @@
+package storage
+
+import (
+	"encoding/binary"
+	"io"
+	"math"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/swarm/bmt"
+)
+
+type ReferenceFileHasher struct {
+	hasher         *bmt.Hasher
+	branches       int
+	segmentSize    int
+	buffer         []byte
+	cursors        []int
+	chunkSize      int
+	totalBytes     int
+	writeByteCount int
+	writeCount     int
+	swap           []byte
+}
+
+func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher {
+	return &ReferenceFileHasher{
+		hasher:      hasher,
+		branches:    branches,
+		segmentSize: hasher.Size(),
+		chunkSize:   branches * hasher.Size(),
+	}
+}
+
+func (f *ReferenceFileHasher) getLevelsFromLength(l int) int {
+	if l == 0 {
+		return 0
+	} else if l <= 4096 {
+		return 2
+	}
+	c := (l - 1) / (f.segmentSize)
+
+	return int(math.Log(float64(c))/math.Log(float64(f.branches)) + 2)
+}
+
+func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash {
+	f.totalBytes = l
+	levelCount := f.getLevelsFromLength(l)
+	log.Debug("level count", "l", levelCount, "b", f.branches, "c", l, "s", f.segmentSize)
+	bufLen := f.segmentSize
+	for i := 1; i < levelCount; i++ {
+		bufLen *= f.branches
+	}
+	f.cursors = make([]int, levelCount)
+	f.buffer = make([]byte, bufLen)
+	f.swap = make([]byte, f.segmentSize)
+	var res bool
+	for !res {
+		input := make([]byte, f.segmentSize)
+		c, err := r.Read(input)
+		log.Trace("read", "c", c, "wbc", f.writeByteCount)
+		if err != nil {
+			if err == io.EOF {
+				log.Debug("haveeof")
+				res = true
+			} else {
+				panic(err)
+			}
+		} else if c < f.segmentSize {
+			input = input[:c]
+		}
+		f.writeByteCount += c
+		if f.writeByteCount == f.totalBytes {
+			res = true
+		}
+		res = f.write(input, 0, res)
+	}
+	return common.BytesToHash(f.buffer[f.cursors[levelCount-1] : f.cursors[levelCount-1]+f.segmentSize])
+}
+
+func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool {
+	for i, l := range f.cursors {
+		log.Debug("cursor", "#", i, "pos", l)
+	}
+	log.Trace("write", "l", level, "len", len(b), "b", b, "end", end, "wbc", f.writeByteCount)
+	copy(f.buffer[f.cursors[level]*f.segmentSize:], b)
+	if level == len(f.cursors)-1 {
+		return true
+	}
+	f.cursors[level]++
+
+	var res bool
+	if f.cursors[level]-f.cursors[level+1] == f.branches || end {
+		span := f.chunkSize
+		for i := 0; i < level; i++ {
+			span *= f.branches
+		}
+		var dataUnderSpan int
+		if end {
+			dataUnderSpan = f.totalBytes % span
+		} else {
+			dataUnderSpan = span
+		}
+		if end && dataUnderSpan == 0 {
+			dataUnderSpan = f.chunkSize
+		}
+		var hashDataSize int
+		if level == 0 {
+			hashDataSize = dataUnderSpan
+		} else {
+			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
+		}
+		meta := make([]byte, 8)
+		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
+		f.hasher.ResetWithLength(meta)
+		writeHashOffset := f.cursors[level+1] * f.segmentSize
+		f.hasher.Write(f.buffer[writeHashOffset : writeHashOffset+hashDataSize])
+		copy(f.swap, f.hasher.Sum(nil))
+		log.Debug("summed", "b", f.swap, "l", f.cursors[level], "l+1", f.cursors[level+1], "spanlength", dataUnderSpan, "span", span, "meta", meta, "from", writeHashOffset, "to", writeHashOffset+hashDataSize, "data", f.buffer[writeHashOffset:writeHashOffset+hashDataSize])
+		res = f.write(f.swap, level+1, end)
+		f.cursors[level] = f.cursors[level+1]
+	}
+	return res
+}

From 75ff8179e8b5c159d825731538655ef309c8c3f6 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 20 Sep 2018 13:22:02 +0200
Subject: [PATCH 18/50] swarm/storage: Correct referencehasher

---
 swarm/storage/filehasher.go      |  1 +
 swarm/storage/filehasher_r.go    | 43 ++++++++++++++++++++--------
 swarm/storage/filehasher_test.go | 48 +++++++++++++++++++++-----------
 3 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index 0844f8d7fb..b8564ad64c 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -284,6 +284,7 @@ func (n *node) done(nodeLength int, spanLength int, parentNode *node) {
 // length is global length
 func (n *node) sum(length int64, potentialSpan int64) {
 
+	log.Debug("sum", "n", fmt.Sprintf("%p", n))
 	if length == 0 {
 		n.result <- n.hasher.Sum(nil, 0, nil)
 		return
diff --git a/swarm/storage/filehasher_r.go b/swarm/storage/filehasher_r.go
index 56d5c49e8a..d8c27c6148 100644
--- a/swarm/storage/filehasher_r.go
+++ b/swarm/storage/filehasher_r.go
@@ -32,20 +32,20 @@ func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHash
 	}
 }
 
-func (f *ReferenceFileHasher) getLevelsFromLength(l int) int {
+func getLevelsFromLength(l int, segmentSize int, branches int) int {
 	if l == 0 {
 		return 0
-	} else if l <= 4096 {
+	} else if l <= segmentSize*branches {
 		return 2
 	}
-	c := (l - 1) / (f.segmentSize)
+	c := (l - 1) / (segmentSize)
 
-	return int(math.Log(float64(c))/math.Log(float64(f.branches)) + 2)
+	return int(math.Log(float64(c))/math.Log(float64(branches)) + 2)
 }
 
 func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash {
 	f.totalBytes = l
-	levelCount := f.getLevelsFromLength(l)
+	levelCount := getLevelsFromLength(l, f.segmentSize, f.branches)
 	log.Debug("level count", "l", levelCount, "b", f.branches, "c", l, "s", f.segmentSize)
 	bufLen := f.segmentSize
 	for i := 1; i < levelCount; i++ {
@@ -78,38 +78,59 @@ func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash {
 	return common.BytesToHash(f.buffer[f.cursors[levelCount-1] : f.cursors[levelCount-1]+f.segmentSize])
 }
 
+// TODO: check length 0
 func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool {
+	log.Trace("write", "l", level, "len", len(b), "b", b, "end", end, "wbc", f.writeByteCount)
+
+	// copy data from buffer to current position of corresponding level in buffer
+	copy(f.buffer[f.cursors[level]*f.segmentSize:], b)
 	for i, l := range f.cursors {
 		log.Debug("cursor", "#", i, "pos", l)
 	}
-	log.Trace("write", "l", level, "len", len(b), "b", b, "end", end, "wbc", f.writeByteCount)
-	copy(f.buffer[f.cursors[level]*f.segmentSize:], b)
+
+	// if we are at the tree root the result will be in the first segmentSize bytes of the buffer. Return
 	if level == len(f.cursors)-1 {
 		return true
 	}
+
+	if end && level > 0 && f.cursors[level] == f.cursors[level+1] {
+		res := f.write(b, level+1, end)
+		return res
+	}
+	// increment the position of this level in buffer
 	f.cursors[level]++
 
+	// perform recursive writes down the tree if end of output or on batch boundary
 	var res bool
 	if f.cursors[level]-f.cursors[level+1] == f.branches || end {
+		if f.cursors[level] == f.cursors[level+1] && f.cursors[level] > 0 {
+			log.Debug("short return in write")
+			return true
+		}
+
+		// calculate what the potential span under this chunk will be
 		span := f.chunkSize
 		for i := 0; i < level; i++ {
 			span *= f.branches
 		}
+
+		// if we have a dangling chunk, simply pass it up
+		// calculate the data in this chunk (the data to be hashed)
 		var dataUnderSpan int
 		if end {
-			dataUnderSpan = f.totalBytes % span
+			dataUnderSpan = (f.totalBytes-1)%span + 1
 		} else {
 			dataUnderSpan = span
 		}
-		if end && dataUnderSpan == 0 {
-			dataUnderSpan = f.chunkSize
-		}
+
+		// calculate the actual data under this span
 		var hashDataSize int
 		if level == 0 {
 			hashDataSize = dataUnderSpan
 		} else {
 			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
 		}
+
 		meta := make([]byte, 8)
 		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
 		f.hasher.ResetWithLength(meta)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 078791706f..00e34971a3 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -7,7 +7,7 @@ import (
 	"fmt"
 	"io"
 	"testing"
-	"time"
+	//"time"
 
 	"github.com/ethereum/go-ethereum/crypto/sha3"
 	"github.com/ethereum/go-ethereum/swarm/bmt"
@@ -31,7 +31,7 @@ func TestSum(t *testing.T) {
 	chunkSize := 128 * 32
 	serialOffset := 0
 	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{chunkSize * 129}
+	dataLengths := []int{chunkSize * 128 * 128}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
@@ -49,7 +49,7 @@ func TestSum(t *testing.T) {
 			}
 		}
 
-		time.Sleep(time.Second * 1)
+		//time.Sleep(time.Second * 1)
 		fh.SetLength(int64(dl))
 		h := fh.Sum(nil)
 
@@ -143,27 +143,41 @@ func TestAnomaly(t *testing.T) {
 
 func TestReferenceFileHasher(t *testing.T) {
 	h := bmt.New(pool)
-	var mismatch int
+	//var mismatch int
 	chunkSize := 128 * 32
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129} //, chunkSize * 130, chunkSize * 128 * 128}
-	//dataLengths := []int{31}
+	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129} //, chunkSize * 130, chunkSize * 128 * 128}
+	dataLengths := []int{chunkSize * 128 * 128}
 	for _, dataLength := range dataLengths {
 		fh := NewReferenceFileHasher(h, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
 		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
 
-		pyramidHash, err := referenceHash(data)
-		if err != nil {
-			t.Fatalf(err.Error())
-		}
+		//		pyramidHash, err := referenceHash(data)
+		//		if err != nil {
+		//			t.Fatalf(err.Error())
+		//		}
+		//
+		//		eq := bytes.Equal(pyramidHash, refHash)
+		//		if !eq {
+		//			mismatch++
+		//		}
+		//		t.Logf("[%7d+%4d]\t%v\tref: %x\tpyr: %x", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, pyramidHash)
+		t.Logf("[%7d+%4d]\tref: %x", dataLength/chunkSize, dataLength%chunkSize, refHash)
+	}
+	//	if mismatch > 0 {
+	//		t.Fatalf("failed have %d mismatch", mismatch)
+	//	}
+}
+
+func TestStupidFileHasher(t *testing.T) {
+	segmentSize := 32
+	branches := 128
+	chunkSize := segmentSize * branches
+	dataLengths := []int{chunkSize*128 + 32}
+	for _, dataLength := range dataLengths {
+		levelCount := getLevelsFromLength(dataLength, segmentSize, branches)
+		for i := 0; i < levelCount; i++ {
 
-		eq := bytes.Equal(pyramidHash, refHash)
-		if !eq {
-			mismatch++
 		}
-		t.Logf("[%7d+%4d]\tref: %x\tpyr: %x", dataLength/chunkSize, dataLength%chunkSize, refHash, pyramidHash)
-	}
-	if mismatch > 0 {
-		t.Fatalf("failed have %d mismatch", mismatch)
 	}
 }

From a86277379c3b048925337f9a0390882caad0d401 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 20 Sep 2018 14:34:30 +0200
Subject: [PATCH 19/50] swarm/storage: Clean up logging and add comments

---
 swarm/storage/filehasher.go      |  17 +--
 swarm/storage/filehasher_r.go    |  35 +++---
 swarm/storage/filehasher_test.go | 176 +++++++++----------------------
 3 files changed, 81 insertions(+), 147 deletions(-)

diff --git a/swarm/storage/filehasher.go b/swarm/storage/filehasher.go
index b8564ad64c..d3c84ac295 100644
--- a/swarm/storage/filehasher.go
+++ b/swarm/storage/filehasher.go
@@ -23,6 +23,7 @@ import (
 	"math"
 	"sync"
 	"sync/atomic"
+	"time"
 
 	"github.com/ethereum/go-ethereum/swarm/bmt"
 	"github.com/ethereum/go-ethereum/swarm/log"
@@ -49,7 +50,6 @@ type FileHasher struct {
 	lnBranches float64
 }
 
-//func NewFileHasher(hasherFunc func() SectionWriter, branches int, secSize int) *FileHasher {
 func NewFileHasher(hasherFunc func() bmt.SectionWriter, branches int, secSize int) *FileHasher {
 	fh := &FileHasher{
 		hasherFunc: hasherFunc,
@@ -116,7 +116,7 @@ func (lev *level) getLevel(pl int) (par *level) {
 	if pl < len(lev.levels) {
 		return lev.levels[pl]
 	}
-	log.Warn("creating level", "l", pl)
+	log.Trace("creating level", "l", pl)
 	par = &level{
 		levelIndex: pl,
 		FileHasher: lev.FileHasher,
@@ -309,10 +309,15 @@ func (n *node) sum(length int64, potentialSpan int64) {
 	// we already checked on top if length is 0. If it is 0 here, it's on span threshold and a full chunk write
 	// otherwise we do not have a full chunk write, and need to make the underlying hash sum
 	if dataLength == 0 {
-		// get the parent node if it exists
-		parentNode := n.getParent(length)
-		parentNode.sum(length, potentialSpan)
-		return
+		// replace this with a channel somewhere
+		for {
+			parentNode := n.getParent(length)
+			if parentNode != nil {
+				parentNode.sum(length, potentialSpan)
+				return
+			}
+			time.Sleep(time.Microsecond)
+		}
 	}
 
 	var bmtLength int
diff --git a/swarm/storage/filehasher_r.go b/swarm/storage/filehasher_r.go
index d8c27c6148..5a649d64e9 100644
--- a/swarm/storage/filehasher_r.go
+++ b/swarm/storage/filehasher_r.go
@@ -20,7 +20,6 @@ type ReferenceFileHasher struct {
 	totalBytes     int
 	writeByteCount int
 	writeCount     int
-	swap           []byte
 }
 
 func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHasher {
@@ -32,6 +31,7 @@ func NewReferenceFileHasher(hasher *bmt.Hasher, branches int) *ReferenceFileHash
 	}
 }
 
+// calculate how many levels the tree will. includes root hash as level
 func getLevelsFromLength(l int, segmentSize int, branches int) int {
 	if l == 0 {
 		return 0
@@ -43,17 +43,19 @@ func getLevelsFromLength(l int, segmentSize int, branches int) int {
 	return int(math.Log(float64(c))/math.Log(float64(branches)) + 2)
 }
 
+// reads segmentwise from input data and writes
+// TODO: Write directly to f.buffer instead of input
+// TODO: See if level 0 data can be written directly to hasher without complicating code
 func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash {
 	f.totalBytes = l
 	levelCount := getLevelsFromLength(l, f.segmentSize, f.branches)
-	log.Debug("level count", "l", levelCount, "b", f.branches, "c", l, "s", f.segmentSize)
+	log.Trace("level count", "l", levelCount, "b", f.branches, "c", l, "s", f.segmentSize)
 	bufLen := f.segmentSize
 	for i := 1; i < levelCount; i++ {
 		bufLen *= f.branches
 	}
 	f.cursors = make([]int, levelCount)
 	f.buffer = make([]byte, bufLen)
-	f.swap = make([]byte, f.segmentSize)
 	var res bool
 	for !res {
 		input := make([]byte, f.segmentSize)
@@ -73,19 +75,20 @@ func (f *ReferenceFileHasher) Hash(r io.Reader, l int) common.Hash {
 		if f.writeByteCount == f.totalBytes {
 			res = true
 		}
-		res = f.write(input, 0, res)
+		f.write(input, 0, res)
 	}
 	return common.BytesToHash(f.buffer[f.cursors[levelCount-1] : f.cursors[levelCount-1]+f.segmentSize])
 }
 
-// TODO: check length 0
+// TODO: check if length 0
+// performs recursive hashing on complete batches or data end
 func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool {
-	log.Trace("write", "l", level, "len", len(b), "b", b, "end", end, "wbc", f.writeByteCount)
+	log.Debug("write", "l", level, "len", len(b), "b", b, "end", end, "wbc", f.writeByteCount)
 
 	// copy data from buffer to current position of corresponding level in buffer
 	copy(f.buffer[f.cursors[level]*f.segmentSize:], b)
 	for i, l := range f.cursors {
-		log.Debug("cursor", "#", i, "pos", l)
+		log.Trace("cursor", "#", i, "pos", l)
 	}
 
 	// if we are at the tree root the result will be in the first segmentSize bytes of the buffer. Return
@@ -93,20 +96,18 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool {
 		return true
 	}
 
+	// if the offset is the same one level up, then we have a dangling chunk and we merely pass it down the tree
 	if end && level > 0 && f.cursors[level] == f.cursors[level+1] {
 		res := f.write(b, level+1, end)
 		return res
 	}
-	// increment the position of this level in buffer
+
+	// we've written to the buffer of this level, so we increment the cursor
 	f.cursors[level]++
 
 	// perform recursive writes down the tree if end of output or on batch boundary
 	var res bool
 	if f.cursors[level]-f.cursors[level+1] == f.branches || end {
-		if f.cursors[level] == f.cursors[level+1] && f.cursors[level] > 0 {
-			log.Debug("short return in write")
-			return true
-		}
 
 		// calculate what the potential span under this chunk will be
 		span := f.chunkSize
@@ -114,7 +115,6 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool {
 			span *= f.branches
 		}
 
-		// if we have a dangling chunk, simply pass it up
 		// calculate the data in this chunk (the data to be hashed)
 		var dataUnderSpan int
 		if end {
@@ -131,14 +131,17 @@ func (f *ReferenceFileHasher) write(b []byte, level int, end bool) bool {
 			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
 		}
 
+		// hash the chunk and write it to the current cursor position on the next level
 		meta := make([]byte, 8)
 		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
 		f.hasher.ResetWithLength(meta)
 		writeHashOffset := f.cursors[level+1] * f.segmentSize
 		f.hasher.Write(f.buffer[writeHashOffset : writeHashOffset+hashDataSize])
-		copy(f.swap, f.hasher.Sum(nil))
-		log.Debug("summed", "b", f.swap, "l", f.cursors[level], "l+1", f.cursors[level+1], "spanlength", dataUnderSpan, "span", span, "meta", meta, "from", writeHashOffset, "to", writeHashOffset+hashDataSize, "data", f.buffer[writeHashOffset:writeHashOffset+hashDataSize])
-		res = f.write(f.swap, level+1, end)
+		hashResult := f.hasher.Sum(nil)
+		log.Debug("summed", "b", hashResult, "l", f.cursors[level], "l+1", f.cursors[level+1], "spanlength", dataUnderSpan, "span", span, "meta", meta, "from", writeHashOffset, "to", writeHashOffset+hashDataSize, "data", f.buffer[writeHashOffset:writeHashOffset+hashDataSize])
+		res = f.write(hashResult, level+1, end)
+
+		// recycle buffer space from the threshold of just written hash
 		f.cursors[level] = f.cursors[level+1]
 	}
 	return res
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 00e34971a3..e8641a4039 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -2,12 +2,9 @@ package storage
 
 import (
 	"bytes"
-	"context"
-	"encoding/binary"
 	"fmt"
-	"io"
 	"testing"
-	//"time"
+	"time"
 
 	"github.com/ethereum/go-ethereum/crypto/sha3"
 	"github.com/ethereum/go-ethereum/swarm/bmt"
@@ -25,17 +22,60 @@ func newAsyncHasher() bmt.SectionWriter {
 	return h.NewAsyncWriter(false)
 }
 
+func TestReferenceFileHasher(t *testing.T) {
+	h := bmt.New(pool)
+	var mismatch int
+	chunkSize := 128 * 32
+	expected := []string{
+		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d",
+		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02",
+		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e",
+		"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8",
+		"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe",
+		"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea",
+		"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef",
+		"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28",
+		"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285",
+		"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42",
+		"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b",
+		"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9",
+		"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6",
+		"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09",
+		"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576",
+		"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df",
+		"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94",
+		"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199",
+		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
+		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
+	}
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	for i, dataLength := range dataLengths {
+		fh := NewReferenceFileHasher(h, 128)
+		_, data := generateSerialData(dataLength, 255, 0)
+		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
+		eq := true
+		if expected[i] != fmt.Sprintf("%x", refHash) {
+			mismatch++
+			eq = false
+		}
+		t.Logf("[%7d+%4d]\t%v\tref: %s\texpect: %x", dataLength/chunkSize, dataLength%chunkSize, eq, expected[i], refHash)
+	}
+	if mismatch > 0 {
+		t.Fatalf("mismatches: %d/%d", mismatch, len(dataLengths))
+	}
+}
+
 func TestSum(t *testing.T) {
 
 	var mismatch int
 	chunkSize := 128 * 32
 	serialOffset := 0
-	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{chunkSize * 128 * 128}
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
 
 	for _, dl := range dataLengths {
 		chunks := dl / chunkSize
 		log.Debug("testing", "c", chunks, "s", dl%chunkSize)
+		fhStartTime := time.Now()
 		fh := NewFileHasher(newAsyncHasher, 128, 32)
 		_, data := generateSerialData(dl, 255, serialOffset)
 		for i := 0; i < len(data); i += 32 {
@@ -49,135 +89,21 @@ func TestSum(t *testing.T) {
 			}
 		}
 
-		//time.Sleep(time.Second * 1)
 		fh.SetLength(int64(dl))
 		h := fh.Sum(nil)
+		rhStartTime := time.Now()
+		rh := NewReferenceFileHasher(bmt.New(pool), 128)
+		p := rh.Hash(bytes.NewReader(data), len(data)).Bytes()
+		rhDur := time.Now().Sub(rhStartTime)
 
-		p, err := referenceHash(data)
-
-		if err != nil {
-			t.Fatalf(err.Error())
-		}
 		eq := bytes.Equal(p, h)
 		if !eq {
 			mismatch++
 		}
 		t.Logf("[%3d + %2d]\t%v\t%x\t%x", chunks, dl%chunkSize, eq, p, h)
-		t.Logf("[%3d + %2d]\t%x", chunks, dl%chunkSize, h)
+		t.Logf("ptime %v\tftime %v", rhDur, rhStartTime.Sub(fhStartTime))
 	}
 	if mismatch > 0 {
 		t.Fatalf("%d/%d mismatches", mismatch, len(dataLengths))
 	}
-
-}
-
-func referenceHash(data []byte) ([]byte, error) {
-	//return []byte{}, nil
-	putGetter := newTestHasherStore(&fakeChunkStore{}, BMTHash)
-	p, _, err := PyramidSplit(context.TODO(), io.LimitReader(bytes.NewReader(data), int64(len(data))), putGetter, putGetter)
-	return p, err
-}
-
-func TestAnomaly(t *testing.T) {
-
-	correctData := []byte{48, 71, 216, 65, 7, 120, 152, 194, 107, 190, 107, 230, 82, 162, 236, 89, 10, 93, 155, 215, 205, 69, 210, 144, 234, 66, 81, 27, 72, 117, 60, 9, 129, 179, 29, 154, 127, 108, 55, 117, 35, 232, 118, 157, 176, 33, 9, 29, 242, 62, 221, 159, 215, 189, 107, 205, 241, 26, 34, 245, 24, 219, 96, 6}
-	doubleHashedDataTwo := []byte{0, 111, 13, 142, 184, 222, 96, 141, 2, 241, 228, 138, 179, 76, 211, 246, 178, 202, 99, 167, 150, 179, 30, 118, 55, 144, 90, 113, 3, 128, 118, 23}
-
-	correctHex := "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199"
-	doubleHashedHex := "b7e298f61b1bf23e21d8f45bf545eb1d6c0c4eaaca7d2c2690fb86038404a6d6"
-
-	var dataLength uint64 = 4096*128 + 4096
-
-	data := make([]byte, dataLength)
-	for i := uint64(0); i < dataLength; i++ {
-		data[i] = byte(i % 255)
-	}
-
-	leftChunk := make([]byte, 4096)
-
-	h := bmt.New(pool)
-	meta := make([]byte, 8)
-	binary.LittleEndian.PutUint64(meta, 4096)
-	for i := 0; i < 128; i++ {
-		h.ResetWithLength(meta)
-		h.Write(data[i*4096 : i*4096+4096])
-		copy(leftChunk[i*32:], h.Sum(nil))
-	}
-
-	// hash the first full batch
-	binary.LittleEndian.PutUint64(meta, 4096*128)
-	h.ResetWithLength(meta)
-	h.Write(leftChunk)
-	leftChunkHash := h.Sum(nil)
-	t.Logf("leftchunk\t%x %v %v", leftChunkHash, bytes.Equal(correctData[:32], leftChunkHash), meta)
-
-	// hash dangling chunk
-	binary.LittleEndian.PutUint64(meta, 4096)
-	h.ResetWithLength(meta)
-	h.Write(data[4096*128:])
-	rightChunkHash := h.Sum(nil)
-	t.Logf("rightchunk\t%x %v %v", rightChunkHash, bytes.Equal(correctData[32:], rightChunkHash), meta)
-
-	// now double hash the right side
-	h.ResetWithLength(meta)
-	h.Write(correctData[32:])
-	altRightChunkHash := h.Sum(nil) // alt-right is wrong, of course :)
-	t.Logf("altrightchunk\t%x %v %v", altRightChunkHash, bytes.Equal(doubleHashedDataTwo, altRightChunkHash), meta)
-
-	// this is the result we get from filehasher
-	binary.LittleEndian.PutUint64(meta, dataLength)
-	h.ResetWithLength(meta)
-	h.Write(leftChunkHash)
-	h.Write(rightChunkHash)
-	resultHex := fmt.Sprintf("%x", h.Sum(nil))
-	t.Logf("%v %v %v", resultHex, resultHex == correctHex, meta)
-
-	// this should match the result from treechunker and pyramidchunker
-	binary.LittleEndian.PutUint64(meta, dataLength)
-	h.ResetWithLength(meta)
-	h.Write(leftChunkHash)
-	h.Write(altRightChunkHash)
-	resultHex = fmt.Sprintf("%x", h.Sum(nil))
-	t.Logf("%v %v %v", resultHex, resultHex == doubleHashedHex, meta)
-}
-
-func TestReferenceFileHasher(t *testing.T) {
-	h := bmt.New(pool)
-	//var mismatch int
-	chunkSize := 128 * 32
-	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129} //, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{chunkSize * 128 * 128}
-	for _, dataLength := range dataLengths {
-		fh := NewReferenceFileHasher(h, 128)
-		_, data := generateSerialData(dataLength, 255, 0)
-		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
-
-		//		pyramidHash, err := referenceHash(data)
-		//		if err != nil {
-		//			t.Fatalf(err.Error())
-		//		}
-		//
-		//		eq := bytes.Equal(pyramidHash, refHash)
-		//		if !eq {
-		//			mismatch++
-		//		}
-		//		t.Logf("[%7d+%4d]\t%v\tref: %x\tpyr: %x", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, pyramidHash)
-		t.Logf("[%7d+%4d]\tref: %x", dataLength/chunkSize, dataLength%chunkSize, refHash)
-	}
-	//	if mismatch > 0 {
-	//		t.Fatalf("failed have %d mismatch", mismatch)
-	//	}
-}
-
-func TestStupidFileHasher(t *testing.T) {
-	segmentSize := 32
-	branches := 128
-	chunkSize := segmentSize * branches
-	dataLengths := []int{chunkSize*128 + 32}
-	for _, dataLength := range dataLengths {
-		levelCount := getLevelsFromLength(dataLength, segmentSize, branches)
-		for i := 0; i < levelCount; i++ {
-
-		}
-	}
 }

From d655184101664c63206abab0433a34d8fe6cf301 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 27 Sep 2018 13:14:20 +0200
Subject: [PATCH 20/50] swarm/storage: Add alt filehasher impl, ok up to chunk
 boundary

---
 swarm/storage/filehasher_alt.go  | 231 +++++++++++++++++++++++++++++++
 swarm/storage/filehasher_test.go |  62 ++++++++-
 2 files changed, 291 insertions(+), 2 deletions(-)
 create mode 100644 swarm/storage/filehasher_alt.go

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
new file mode 100644
index 0000000000..ee98f22e20
--- /dev/null
+++ b/swarm/storage/filehasher_alt.go
@@ -0,0 +1,231 @@
+package storage
+
+import (
+	"encoding/binary"
+	"sync"
+
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/ethereum/go-ethereum/swarm/bmt"
+)
+
+const (
+	altFileHasherMaxLevels = 9 // 22 zetabytes should be enough for anyone
+)
+
+type AltFileHasher struct {
+	branches    int
+	segmentSize int
+	hashers     [altFileHasherMaxLevels]bmt.SectionWriter
+	buffers     [altFileHasherMaxLevels - 1][]byte
+	levelCount  int
+	chunkSize   int
+	finished    bool
+	totalBytes  int
+	targetCount [altFileHasherMaxLevels - 1]int
+	writeCount  [altFileHasherMaxLevels]int
+	doneC       [altFileHasherMaxLevels]chan struct{}
+	wg          sync.WaitGroup                         // used when level done
+	lwg         [altFileHasherMaxLevels]sync.WaitGroup // used when busy hashing
+	lock        sync.Mutex
+}
+
+func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, branches int) *AltFileHasher {
+	f := &AltFileHasher{
+		branches:    branches,
+		segmentSize: segmentSize,
+		chunkSize:   branches * segmentSize,
+	}
+	for i := 0; i < altFileHasherMaxLevels-1; i++ {
+		f.buffers[i] = make([]byte, f.chunkSize)
+		f.hashers[i] = hasherFunc()
+		f.doneC[i] = make(chan struct{}, 1)
+	}
+	f.Reset()
+	return f
+}
+
+func (f *AltFileHasher) Reset() {
+	f.totalBytes = 0
+	f.levelCount = 0
+	f.wg.Add(altFileHasherMaxLevels)
+	for i := 0; i < altFileHasherMaxLevels; i++ {
+		if i > 0 {
+			f.targetCount[i-1] = 0
+		}
+		f.writeCount[i] = 0
+	}
+}
+
+func (f *AltFileHasher) isWriteFinished() bool {
+	var finished bool
+	f.lock.Lock()
+	finished = f.finished
+	f.lock.Unlock()
+	return finished
+}
+
+func (f *AltFileHasher) Finish(b []byte) []byte {
+	if b != nil {
+		f.totalBytes += len(b)
+	}
+	f.finished = true
+
+	// find our level height and release the unused levels
+	f.levelCount = getLevelsFromLength(f.totalBytes, f.segmentSize, f.branches)
+
+	log.Debug("finish set", "levelcount", f.levelCount)
+	for i := altFileHasherMaxLevels; i > f.levelCount; i-- {
+		log.Debug("purging unused level wg", "l", i)
+		f.wg.Done()
+	}
+
+	// calculate the amount of writes expected on each level
+	target := (f.totalBytes-1)/f.segmentSize + 1
+	for i := 1; i < f.levelCount; i++ {
+		target = (target-1)/f.branches + 1
+		f.targetCount[i] = target
+		log.Debug("setting targetcount", "l", i, "t", target)
+	}
+
+	// write and return result when we get it back
+	f.write(b, f.writeCount[0], 0)
+	f.wg.Wait()
+	return f.buffers[f.levelCount-1][:f.segmentSize]
+}
+
+func (f *AltFileHasher) Write(b []byte) {
+	f.totalBytes += len(b)
+	f.write(b, f.writeCount[0], 0)
+}
+
+func (f *AltFileHasher) getPotentialSpan(level int) int {
+	span := f.chunkSize
+	for i := 0; i < level; i++ {
+		span *= f.branches
+	}
+	return span
+}
+
+// TODO: check if length 0
+// TODO: log error if not end and len(b) < segmentsize
+// performs recursive hashing on complete batches or data end
+func (f *AltFileHasher) write(b []byte, offset int, level int) {
+
+	if b == nil {
+		log.Debug("write", "level", level, "offset", offset, "length", "nil")
+	} else {
+		l := 32
+		if len(b) < l {
+			l = len(b)
+		}
+		log.Debug("write", "level", level, "offset", offset, "length", len(b), "data", b[:l])
+	}
+
+	// top level then return
+	if level == f.levelCount-1 {
+		copy(f.buffers[level], b)
+		f.lock.Lock()
+		f.wg.Done()
+		f.lock.Unlock()
+		log.Debug("top done", "level", level)
+		return
+	}
+
+	// thread safe writecount
+	// b will never be nil except bottom level, which will have already been hashed if on chunk boundary
+	f.lock.Lock()
+	wc := f.writeCount[level]
+	f.lock.Unlock()
+
+	// only write if we have data
+	// data might be nil when upon write finish
+	if b != nil {
+		f.hashers[level].Write(offset%f.branches, b)
+		f.lock.Lock()
+		f.writeCount[level]++
+		wc = f.writeCount[level]
+		f.lock.Unlock()
+	} else if wc%f.branches == 0 {
+		f.lock.Lock()
+		f.wg.Done()
+		f.lock.Unlock()
+		f.doneC[level] <- struct{}{}
+		return
+	}
+
+	// execute the hasher if:
+	// - we are on a chunk edge
+	// - we are on the data level and writes are set to finished
+	// - we are above data level, writes are finished, and expected level write count is reached
+	executeHasher := false
+	if wc%f.branches == 0 {
+		log.Debug("executehasher", "reason", "edge", "level", level, "offset", offset)
+		executeHasher = true
+	} else if f.finished && level == 0 {
+		log.Debug("executehasher", "reason", "data done", "level", level, "offset", offset)
+		executeHasher = true
+	} else if f.finished && f.targetCount[level] > 0 && f.targetCount[level] == wc {
+		<-f.doneC[level-1]
+		log.Debug("executehasher", "reason", "target done", "level", level, "offset", offset)
+		executeHasher = true
+	}
+
+	if executeHasher {
+
+		f.lock.Lock()
+		f.lwg[level].Add(1)
+		f.lock.Unlock()
+
+		// calculate what the potential span under this chunk will be
+		span := f.getPotentialSpan(level)
+
+		// calculate the actual data under this span
+		// if data is fully written, the current chunk may be shorter than the span
+		var dataUnderSpan int
+		if f.isWriteFinished() {
+			dataUnderSpan = (f.totalBytes-1)%span + 1
+		} else {
+			dataUnderSpan = span
+		}
+
+		// calculate the length of the actual data in this chunk (the data to be hashed)
+		var hashDataSize int
+		if level == 0 {
+			hashDataSize = dataUnderSpan
+		} else {
+			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
+		}
+
+		// hash the chunk and write it to the current cursor position on the next level
+		meta := make([]byte, 8)
+		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
+		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
+		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
+		f.hashers[level].Reset()
+		go func(level int, wc int, finished bool) {
+			f.lwg[level+1].Wait()
+			chunkWriteCount := wc % f.branches
+			parentOffset := (chunkWriteCount - 1) / f.branches
+			if (level == 0 && finished) || f.targetCount[level] == wc {
+				log.Debug("done", "level", level)
+				f.lock.Lock()
+				f.wg.Done()
+				f.lock.Unlock()
+				f.doneC[level] <- struct{}{}
+			}
+			f.write(hashResult, parentOffset, level+1)
+			f.lock.Lock()
+			f.lwg[level].Done()
+			f.lock.Unlock()
+		}(level, wc, f.finished)
+	}
+}
+
+func (f *AltFileHasher) wgDoneFunc(level int, prune bool) func() {
+	log.Warn("done", "level", level, "prune", prune)
+	return func() {
+		f.lock.Lock()
+		f.wg.Done()
+		f.lock.Unlock()
+	}
+}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index e8641a4039..9b6e3ed869 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -48,8 +48,11 @@ func TestReferenceFileHasher(t *testing.T) {
 		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32}
+	//dataLengths := []int{chunkSize + 32}
 	for i, dataLength := range dataLengths {
+		log.Info("start", "len", dataLength)
 		fh := NewReferenceFileHasher(h, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
 		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
@@ -58,7 +61,62 @@ func TestReferenceFileHasher(t *testing.T) {
 			mismatch++
 			eq = false
 		}
-		t.Logf("[%7d+%4d]\t%v\tref: %s\texpect: %x", dataLength/chunkSize, dataLength%chunkSize, eq, expected[i], refHash)
+		t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i])
+	}
+	if mismatch > 0 {
+		t.Fatalf("mismatches: %d/%d", mismatch, len(dataLengths))
+	}
+}
+
+func TestAltFileHasher(t *testing.T) {
+	var mismatch int
+	chunkSize := 128 * 32
+	expected := []string{
+		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d",
+		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02",
+		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e",
+		"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8",
+		"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe",
+		"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea",
+		"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef",
+		"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28",
+		"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285",
+		"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42",
+		"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b",
+		"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9",
+		"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6",
+		"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09",
+		"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576",
+		"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df",
+		"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94",
+		"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199",
+		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
+		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
+	}
+	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32}
+	//dataLengths := []int{chunkSize + 32}
+	for i, dataLength := range dataLengths {
+		log.Info("start", "len", dataLength)
+		fh := NewAltFileHasher(newAsyncHasher, 32, 128)
+		_, data := generateSerialData(dataLength, 255, 0)
+		l := 32
+		offset := 0
+		for i := 0; i < dataLength; i += 32 {
+			remain := dataLength - offset
+			if remain < l {
+				l = remain
+			}
+			fh.Write(data[offset : offset+l])
+			offset += 32
+		}
+		refHash := fh.Finish(nil)
+		eq := true
+		if expected[i] != fmt.Sprintf("%x", refHash) {
+			mismatch++
+			eq = false
+		}
+		t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i])
 	}
 	if mismatch > 0 {
 		t.Fatalf("mismatches: %d/%d", mismatch, len(dataLengths))

From 4fa31dc37b6513769a40e5a806a59749cbccc753 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 27 Sep 2018 13:49:02 +0200
Subject: [PATCH 21/50] swarm/storage: Correct parent offset calculation

---
 swarm/storage/filehasher_alt.go  |  6 +++--
 swarm/storage/filehasher_test.go | 45 +++++++++++++++++---------------
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index ee98f22e20..3db5594b56 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -203,9 +203,11 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
 		f.hashers[level].Reset()
 		go func(level int, wc int, finished bool) {
+			// if the hasher on the level about is still working, wait for it
 			f.lwg[level+1].Wait()
-			chunkWriteCount := wc % f.branches
-			parentOffset := (chunkWriteCount - 1) / f.branches
+			//chunkWriteCount := wc % f.branches
+			//parentOffset := (chunkWriteCount - 1) / f.branches
+			parentOffset := (wc - 1) / f.branches
 			if (level == 0 && finished) || f.targetCount[level] == wc {
 				log.Debug("done", "level", level)
 				f.lock.Lock()
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 9b6e3ed869..c48e834774 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -22,8 +22,7 @@ func newAsyncHasher() bmt.SectionWriter {
 	return h.NewAsyncWriter(false)
 }
 
-func TestReferenceFileHasher(t *testing.T) {
-	h := bmt.New(pool)
+func TestAltFileHasher(t *testing.T) {
 	var mismatch int
 	chunkSize := 128 * 32
 	expected := []string{
@@ -48,14 +47,27 @@ func TestReferenceFileHasher(t *testing.T) {
 		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
-	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32}
-	//dataLengths := []int{chunkSize + 32}
-	for i, dataLength := range dataLengths {
+	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	start := 14
+	//end := len(dataLengths)
+	//start := 7
+	end := 15
+	for i := start; i < end; i++ {
+		dataLength := dataLengths[i]
 		log.Info("start", "len", dataLength)
-		fh := NewReferenceFileHasher(h, 128)
+		fh := NewAltFileHasher(newAsyncHasher, 32, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
-		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
+		l := 32
+		offset := 0
+		for i := 0; i < dataLength; i += 32 {
+			remain := dataLength - offset
+			if remain < l {
+				l = remain
+			}
+			fh.Write(data[offset : offset+l])
+			offset += 32
+		}
+		refHash := fh.Finish(nil)
 		eq := true
 		if expected[i] != fmt.Sprintf("%x", refHash) {
 			mismatch++
@@ -68,7 +80,8 @@ func TestReferenceFileHasher(t *testing.T) {
 	}
 }
 
-func TestAltFileHasher(t *testing.T) {
+func TestReferenceFileHasher(t *testing.T) {
+	h := bmt.New(pool)
 	var mismatch int
 	chunkSize := 128 * 32
 	expected := []string{
@@ -98,19 +111,9 @@ func TestAltFileHasher(t *testing.T) {
 	//dataLengths := []int{chunkSize + 32}
 	for i, dataLength := range dataLengths {
 		log.Info("start", "len", dataLength)
-		fh := NewAltFileHasher(newAsyncHasher, 32, 128)
+		fh := NewReferenceFileHasher(h, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
-		l := 32
-		offset := 0
-		for i := 0; i < dataLength; i += 32 {
-			remain := dataLength - offset
-			if remain < l {
-				l = remain
-			}
-			fh.Write(data[offset : offset+l])
-			offset += 32
-		}
-		refHash := fh.Finish(nil)
+		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
 		eq := true
 		if expected[i] != fmt.Sprintf("%x", refHash) {
 			mismatch++

From 83b3bb55635d5504f91289f1424316fd4baba2d1 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 27 Sep 2018 14:33:22 +0200
Subject: [PATCH 22/50] swarm/storage: Clean up filehasher test

alt filehasher hangs on dangling chunk, wip fix
---
 swarm/storage/filehasher_alt.go  | 23 ++++++++--
 swarm/storage/filehasher_test.go | 77 ++++++++++++--------------------
 2 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 3db5594b56..2063d19668 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -106,8 +106,7 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 	return span
 }
 
-// TODO: check if length 0
-// TODO: log error if not end and len(b) < segmentsize
+// TODO: ensure local copies of all thread unsafe vars
 // performs recursive hashing on complete batches or data end
 func (f *AltFileHasher) write(b []byte, offset int, level int) {
 
@@ -172,6 +171,23 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 
 	if executeHasher {
 
+		// check for the dangling chunk
+		if level > 0 && f.finished {
+			f.lock.Lock()
+			cwc := f.writeCount[level-1]
+			f.lock.Unlock()
+			if offset%f.branches == 0 && cwc%(f.branches*f.branches) < f.branches {
+				log.Debug("dangle", "level", level)
+				parentOffset := (wc - 1) / f.branches
+				f.write(b, parentOffset, level+1)
+				f.lock.Lock()
+				f.wg.Done()
+				f.lock.Unlock()
+				f.doneC[level] <- struct{}{}
+				return
+			}
+		}
+
 		f.lock.Lock()
 		f.lwg[level].Add(1)
 		f.lock.Unlock()
@@ -205,8 +221,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 		go func(level int, wc int, finished bool) {
 			// if the hasher on the level about is still working, wait for it
 			f.lwg[level+1].Wait()
-			//chunkWriteCount := wc % f.branches
-			//parentOffset := (chunkWriteCount - 1) / f.branches
+
 			parentOffset := (wc - 1) / f.branches
 			if (level == 0 && finished) || f.targetCount[level] == wc {
 				log.Debug("done", "level", level)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index c48e834774..8a756aeea4 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -11,21 +11,19 @@ import (
 	"github.com/ethereum/go-ethereum/swarm/log"
 )
 
-var pool *bmt.TreePool
-
-func init() {
-	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
-}
+const (
+	segmentSize = 32
+	branches    = 128
+	chunkSize   = 4096
+)
 
-func newAsyncHasher() bmt.SectionWriter {
-	h := bmt.New(pool)
-	return h.NewAsyncWriter(false)
-}
+var pool *bmt.TreePool
 
-func TestAltFileHasher(t *testing.T) {
-	var mismatch int
-	chunkSize := 128 * 32
-	expected := []string{
+var (
+	start       = 0
+	end         = 14
+	dataLengths = []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
+	expected    = []string{
 		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d",
 		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02",
 		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e",
@@ -47,11 +45,20 @@ func TestAltFileHasher(t *testing.T) {
 		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	start := 14
-	//end := len(dataLengths)
-	//start := 7
-	end := 15
+)
+
+func init() {
+	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
+}
+
+func newAsyncHasher() bmt.SectionWriter {
+	h := bmt.New(pool)
+	return h.NewAsyncWriter(false)
+}
+
+func TestAltFileHasher(t *testing.T) {
+	var mismatch int
+
 	for i := start; i < end; i++ {
 		dataLength := dataLengths[i]
 		log.Info("start", "len", dataLength)
@@ -83,33 +90,8 @@ func TestAltFileHasher(t *testing.T) {
 func TestReferenceFileHasher(t *testing.T) {
 	h := bmt.New(pool)
 	var mismatch int
-	chunkSize := 128 * 32
-	expected := []string{
-		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d",
-		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02",
-		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e",
-		"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8",
-		"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe",
-		"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea",
-		"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef",
-		"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28",
-		"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285",
-		"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42",
-		"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b",
-		"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9",
-		"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6",
-		"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09",
-		"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576",
-		"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df",
-		"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94",
-		"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199",
-		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
-		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
-	}
-	//dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32}
-	//dataLengths := []int{chunkSize + 32}
-	for i, dataLength := range dataLengths {
+	for i := start; i < end; i++ {
+		dataLength := dataLengths[i]
 		log.Info("start", "len", dataLength)
 		fh := NewReferenceFileHasher(h, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
@@ -129,11 +111,10 @@ func TestReferenceFileHasher(t *testing.T) {
 func TestSum(t *testing.T) {
 
 	var mismatch int
-	chunkSize := 128 * 32
 	serialOffset := 0
-	dataLengths := []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
 
-	for _, dl := range dataLengths {
+	for i := start; i < end; i++ {
+		dl := dataLengths[i]
 		chunks := dl / chunkSize
 		log.Debug("testing", "c", chunks, "s", dl%chunkSize)
 		fhStartTime := time.Now()

From fefa180d23c5ec1d29a0ab13e0410d3638e48378 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 27 Sep 2018 19:17:11 +0200
Subject: [PATCH 23/50] swarm/storage: Correct on dangling chunk

Hangs intermittently, review concurrency in write state vars
---
 swarm/storage/filehasher_alt.go  | 23 ++++++++++++-----------
 swarm/storage/filehasher_test.go | 28 ++++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 2063d19668..fc6aa730ff 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -88,14 +88,14 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	}
 
 	// write and return result when we get it back
-	f.write(b, f.writeCount[0], 0)
+	f.write(b, f.writeCount[0], 0, f.totalBytes)
 	f.wg.Wait()
 	return f.buffers[f.levelCount-1][:f.segmentSize]
 }
 
 func (f *AltFileHasher) Write(b []byte) {
 	f.totalBytes += len(b)
-	f.write(b, f.writeCount[0], 0)
+	f.write(b, f.writeCount[0], 0, f.totalBytes)
 }
 
 func (f *AltFileHasher) getPotentialSpan(level int) int {
@@ -108,16 +108,16 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 
 // TODO: ensure local copies of all thread unsafe vars
 // performs recursive hashing on complete batches or data end
-func (f *AltFileHasher) write(b []byte, offset int, level int) {
+func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 
 	if b == nil {
-		log.Debug("write", "level", level, "offset", offset, "length", "nil")
+		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", f.writeCount[level])
 	} else {
 		l := 32
 		if len(b) < l {
 			l = len(b)
 		}
-		log.Debug("write", "level", level, "offset", offset, "length", len(b), "data", b[:l])
+		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", f.writeCount[level], "data", b[:l])
 	}
 
 	// top level then return
@@ -177,13 +177,13 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 			cwc := f.writeCount[level-1]
 			f.lock.Unlock()
 			if offset%f.branches == 0 && cwc%(f.branches*f.branches) < f.branches {
-				log.Debug("dangle", "level", level)
+				log.Debug("dangle done", "level", level)
 				parentOffset := (wc - 1) / f.branches
-				f.write(b, parentOffset, level+1)
 				f.lock.Lock()
 				f.wg.Done()
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
+				f.write(b, parentOffset, level+1, total)
 				return
 			}
 		}
@@ -199,7 +199,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 		// if data is fully written, the current chunk may be shorter than the span
 		var dataUnderSpan int
 		if f.isWriteFinished() {
-			dataUnderSpan = (f.totalBytes-1)%span + 1
+			//dataUnderSpan = (f.totalBytes-1)%span + 1
+			dataUnderSpan = (total-1)%span + 1
 		} else {
 			dataUnderSpan = span
 		}
@@ -218,7 +219,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
 		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
 		f.hashers[level].Reset()
-		go func(level int, wc int, finished bool) {
+		go func(level int, wc int, finished bool, total int) {
 			// if the hasher on the level about is still working, wait for it
 			f.lwg[level+1].Wait()
 
@@ -230,11 +231,11 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
 			}
-			f.write(hashResult, parentOffset, level+1)
+			f.write(hashResult, parentOffset, level+1, total)
 			f.lock.Lock()
 			f.lwg[level].Done()
 			f.lock.Unlock()
-		}(level, wc, f.finished)
+		}(level, wc, f.finished, f.totalBytes)
 	}
 }
 
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 8a756aeea4..1a0ac42a64 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -20,10 +20,30 @@ const (
 var pool *bmt.TreePool
 
 var (
-	start       = 0
-	end         = 14
-	dataLengths = []int{31, 32, 33, 63, 64, 65, chunkSize, chunkSize + 31, chunkSize + 32, chunkSize + 63, chunkSize + 64, chunkSize * 2, chunkSize*2 + 32, chunkSize * 128, chunkSize*128 + 31, chunkSize*128 + 32, chunkSize*128 + 64, chunkSize * 129, chunkSize * 130, chunkSize * 128 * 128}
-	expected    = []string{
+	start       = 14
+	end         = 15
+	dataLengths = []int{31, // 0
+		32,                    // 1
+		33,                    // 2
+		63,                    // 3
+		64,                    // 4
+		65,                    // 5
+		chunkSize,             // 6
+		chunkSize + 31,        // 7
+		chunkSize + 32,        // 8
+		chunkSize + 63,        // 9
+		chunkSize + 64,        // 10
+		chunkSize * 2,         // 11
+		chunkSize*2 + 32,      // 12
+		chunkSize * 128,       // 13
+		chunkSize*128 + 31,    // 14
+		chunkSize*128 + 32,    // 15
+		chunkSize*128 + 64,    // 16
+		chunkSize * 129,       // 17
+		chunkSize * 130,       // 18
+		chunkSize * 128 * 128, // 19
+	}
+	expected = []string{
 		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d",
 		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02",
 		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e",

From b229c8c27dd623788832e3e900710781d666bb79 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 28 Sep 2018 16:17:16 +0200
Subject: [PATCH 24/50] swarm/storage: Add comments on altfilehasher

---
 swarm/storage/filehasher_alt.go  | 24 +++++++++++++-----------
 swarm/storage/filehasher_test.go |  5 +++--
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index fc6aa730ff..4fe22cf7f5 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -15,18 +15,18 @@ const (
 type AltFileHasher struct {
 	branches    int
 	segmentSize int
-	hashers     [altFileHasherMaxLevels]bmt.SectionWriter
-	buffers     [altFileHasherMaxLevels - 1][]byte
-	levelCount  int
 	chunkSize   int
-	finished    bool
-	totalBytes  int
-	targetCount [altFileHasherMaxLevels - 1]int
-	writeCount  [altFileHasherMaxLevels]int
-	doneC       [altFileHasherMaxLevels]chan struct{}
-	wg          sync.WaitGroup                         // used when level done
+	hashers     [altFileHasherMaxLevels]bmt.SectionWriter
+	buffers     [altFileHasherMaxLevels][]byte         // holds chunk data on each level (todo; push data to channel on complete)
+	levelCount  int                                    // number of levels in this job (only determined when Finish() is called
+	finished    bool                                   // finished writing data
+	totalBytes  int                                    // total data bytes written
+	targetCount [altFileHasherMaxLevels - 1]int        // expected section writes per level
+	writeCount  [altFileHasherMaxLevels]int            // number of section writes per level
+	doneC       [altFileHasherMaxLevels]chan struct{}  // used to tell parent that child is done writing on right edge
+	wg          sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
 	lwg         [altFileHasherMaxLevels]sync.WaitGroup // used when busy hashing
-	lock        sync.Mutex
+	lock        sync.Mutex                             // protect filehasher state vars
 }
 
 func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, branches int) *AltFileHasher {
@@ -139,7 +139,9 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 	// only write if we have data
 	// data might be nil when upon write finish
 	if b != nil {
-		f.hashers[level].Write(offset%f.branches, b)
+		netOffset := (offset % f.branches)
+		f.hashers[level].Write(netOffset, b)
+		copy(f.buffers[level][netOffset*f.segmentSize:], b)
 		f.lock.Lock()
 		f.writeCount[level]++
 		wc = f.writeCount[level]
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 1a0ac42a64..5ea758f028 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -20,8 +20,6 @@ const (
 var pool *bmt.TreePool
 
 var (
-	start       = 14
-	end         = 15
 	dataLengths = []int{31, // 0
 		32,                    // 1
 		33,                    // 2
@@ -65,6 +63,9 @@ var (
 		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
+
+	start = 19
+	end   = 20
 )
 
 func init() {

From d10a5e2b3e0ab69d417a65e889652a3680d90d80 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sun, 14 Oct 2018 22:53:37 +0200
Subject: [PATCH 25/50] swarm/storage: WIP Extend filehasher level buffer to
 batch size

---
 swarm/storage/filehasher_alt.go | 49 ++++++++++++++++++---------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 4fe22cf7f5..3604ebdcfa 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -13,30 +13,32 @@ const (
 )
 
 type AltFileHasher struct {
-	branches    int
-	segmentSize int
-	chunkSize   int
-	hashers     [altFileHasherMaxLevels]bmt.SectionWriter
-	buffers     [altFileHasherMaxLevels][]byte         // holds chunk data on each level (todo; push data to channel on complete)
-	levelCount  int                                    // number of levels in this job (only determined when Finish() is called
-	finished    bool                                   // finished writing data
-	totalBytes  int                                    // total data bytes written
-	targetCount [altFileHasherMaxLevels - 1]int        // expected section writes per level
-	writeCount  [altFileHasherMaxLevels]int            // number of section writes per level
-	doneC       [altFileHasherMaxLevels]chan struct{}  // used to tell parent that child is done writing on right edge
-	wg          sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
-	lwg         [altFileHasherMaxLevels]sync.WaitGroup // used when busy hashing
-	lock        sync.Mutex                             // protect filehasher state vars
+	branches      int
+	segmentSize   int
+	chunkSize     int
+	batchSegments int
+	hashers       [altFileHasherMaxLevels]bmt.SectionWriter
+	buffers       [altFileHasherMaxLevels][]byte         // holds chunk data on each level (todo; push data to channel on complete)
+	levelCount    int                                    // number of levels in this job (only determined when Finish() is called
+	finished      bool                                   // finished writing data
+	totalBytes    int                                    // total data bytes written
+	targetCount   [altFileHasherMaxLevels - 1]int        // expected section writes per level
+	writeCount    [altFileHasherMaxLevels]int            // number of section writes per level
+	doneC         [altFileHasherMaxLevels]chan struct{}  // used to tell parent that child is done writing on right edge
+	wg            sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
+	lwg           [altFileHasherMaxLevels]sync.WaitGroup // used when busy hashing
+	lock          sync.Mutex                             // protect filehasher state vars
 }
 
 func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, branches int) *AltFileHasher {
 	f := &AltFileHasher{
-		branches:    branches,
-		segmentSize: segmentSize,
-		chunkSize:   branches * segmentSize,
+		branches:      branches,
+		segmentSize:   segmentSize,
+		chunkSize:     branches * segmentSize,
+		batchSegments: branches * branches,
 	}
 	for i := 0; i < altFileHasherMaxLevels-1; i++ {
-		f.buffers[i] = make([]byte, f.chunkSize)
+		f.buffers[i] = make([]byte, f.chunkSize*branches) // 4.6M with 9 levels
 		f.hashers[i] = hasherFunc()
 		f.doneC[i] = make(chan struct{}, 1)
 	}
@@ -139,8 +141,9 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 	// only write if we have data
 	// data might be nil when upon write finish
 	if b != nil {
-		netOffset := (offset % f.branches)
-		f.hashers[level].Write(netOffset, b)
+		//netOffset := (offset % f.branches)
+		netOffset := (offset % f.batchSegments)
+		f.hashers[level].Write(netOffset%f.branches, b)
 		copy(f.buffers[level][netOffset*f.segmentSize:], b)
 		f.lock.Lock()
 		f.writeCount[level]++
@@ -160,6 +163,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 	// - we are above data level, writes are finished, and expected level write count is reached
 	executeHasher := false
 	if wc%f.branches == 0 {
+		//if wc%f.batchSegments == 0 {
 		log.Debug("executehasher", "reason", "edge", "level", level, "offset", offset)
 		executeHasher = true
 	} else if f.finished && level == 0 {
@@ -178,7 +182,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 			f.lock.Lock()
 			cwc := f.writeCount[level-1]
 			f.lock.Unlock()
-			if offset%f.branches == 0 && cwc%(f.branches*f.branches) < f.branches {
+			//if offset%f.branches == 0 && cwc%(f.branches*f.branches) < f.branches {
+			if offset%f.batchSegments == 0 && cwc%f.batchSegments < f.branches { // verify why do we need the latter part?
 				log.Debug("dangle done", "level", level)
 				parentOffset := (wc - 1) / f.branches
 				f.lock.Lock()
@@ -225,7 +230,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 			// if the hasher on the level about is still working, wait for it
 			f.lwg[level+1].Wait()
 
-			parentOffset := (wc - 1) / f.branches
+			parentOffset := (wc - 1) / f.batchSegments //f.branches
 			if (level == 0 && finished) || f.targetCount[level] == wc {
 				log.Debug("done", "level", level)
 				f.lock.Lock()

From 2d7a24d18e6baab8906f793636a6ebbfe336fe36 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 18 Oct 2018 06:53:26 +0200
Subject: [PATCH 26/50] swarm/storage: WIP Correct but hangs

---
 swarm/storage/filehasher_alt.go  | 13 ++++++-------
 swarm/storage/filehasher_test.go |  4 ++--
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 3604ebdcfa..c8d38914c7 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -112,6 +112,7 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 // performs recursive hashing on complete batches or data end
 func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 
+	// only for log, delete on prod
 	if b == nil {
 		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", f.writeCount[level])
 	} else {
@@ -122,7 +123,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", f.writeCount[level], "data", b[:l])
 	}
 
-	// top level then return
+	// if top level then return
 	if level == f.levelCount-1 {
 		copy(f.buffers[level], b)
 		f.lock.Lock()
@@ -133,13 +134,12 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 	}
 
 	// thread safe writecount
-	// b will never be nil except bottom level, which will have already been hashed if on chunk boundary
 	f.lock.Lock()
 	wc := f.writeCount[level]
 	f.lock.Unlock()
 
 	// only write if we have data
-	// data might be nil when upon write finish
+	// b will never be nil except bottom level where it can be nil upon finish (which will have already been hashed if on chunk boundary)
 	if b != nil {
 		//netOffset := (offset % f.branches)
 		netOffset := (offset % f.batchSegments)
@@ -163,7 +163,6 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 	// - we are above data level, writes are finished, and expected level write count is reached
 	executeHasher := false
 	if wc%f.branches == 0 {
-		//if wc%f.batchSegments == 0 {
 		log.Debug("executehasher", "reason", "edge", "level", level, "offset", offset)
 		executeHasher = true
 	} else if f.finished && level == 0 {
@@ -227,10 +226,10 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
 		f.hashers[level].Reset()
 		go func(level int, wc int, finished bool, total int) {
-			// if the hasher on the level about is still working, wait for it
+			// if the hasher on the level above is still working, wait for it
 			f.lwg[level+1].Wait()
-
-			parentOffset := (wc - 1) / f.batchSegments //f.branches
+			parentOffset := (wc - 1) / f.branches
+			log.Debug(">>>> wc", "wc", wc, "l", level, "f.BatchSegments", f.batchSegments, "parentffset", parentOffset)
 			if (level == 0 && finished) || f.targetCount[level] == wc {
 				log.Debug("done", "level", level)
 				f.lock.Lock()
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 5ea758f028..3b120183b3 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -64,8 +64,8 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 19
-	end   = 20
+	start = 1
+	end   = 14
 )
 
 func init() {

From 4f49a0380a80132563466c453fb5fb980902109d Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sat, 20 Oct 2018 11:55:36 +0200
Subject: [PATCH 27/50] swarm/storage: Resolve hang

---
 swarm/storage/filehasher_alt.go | 48 +++++++++++++++++----------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index c8d38914c7..f9a6a6e943 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -67,6 +67,7 @@ func (f *AltFileHasher) isWriteFinished() bool {
 }
 
 func (f *AltFileHasher) Finish(b []byte) []byte {
+	f.lock.Lock()
 	if b != nil {
 		f.totalBytes += len(b)
 	}
@@ -88,16 +89,17 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 		f.targetCount[i] = target
 		log.Debug("setting targetcount", "l", i, "t", target)
 	}
+	f.lock.Unlock()
 
 	// write and return result when we get it back
-	f.write(b, f.writeCount[0], 0, f.totalBytes)
+	f.write(b, f.writeCount[0], 0)
 	f.wg.Wait()
 	return f.buffers[f.levelCount-1][:f.segmentSize]
 }
 
 func (f *AltFileHasher) Write(b []byte) {
 	f.totalBytes += len(b)
-	f.write(b, f.writeCount[0], 0, f.totalBytes)
+	f.write(b, f.writeCount[0], 0)
 }
 
 func (f *AltFileHasher) getPotentialSpan(level int) int {
@@ -110,38 +112,40 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 
 // TODO: ensure local copies of all thread unsafe vars
 // performs recursive hashing on complete batches or data end
-func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
+func (f *AltFileHasher) write(b []byte, offset int, level int) {
+
+	// thread safe state vars
+	f.lock.Lock()
+	wc := f.writeCount[level]
+	currentTotal := f.totalBytes
+	targetCount := f.targetCount[level]
+	f.lock.Unlock()
 
 	// only for log, delete on prod
 	if b == nil {
-		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", f.writeCount[level])
+		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", f.writeCount[level], "total", currentTotal)
 	} else {
 		l := 32
 		if len(b) < l {
 			l = len(b)
 		}
-		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", f.writeCount[level], "data", b[:l])
+		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", f.writeCount[level], "data", b[:l], "total", currentTotal)
 	}
 
+	f.lock.Lock()
 	// if top level then return
 	if level == f.levelCount-1 {
 		copy(f.buffers[level], b)
-		f.lock.Lock()
 		f.wg.Done()
 		f.lock.Unlock()
 		log.Debug("top done", "level", level)
 		return
 	}
-
-	// thread safe writecount
-	f.lock.Lock()
-	wc := f.writeCount[level]
 	f.lock.Unlock()
 
 	// only write if we have data
 	// b will never be nil except bottom level where it can be nil upon finish (which will have already been hashed if on chunk boundary)
 	if b != nil {
-		//netOffset := (offset % f.branches)
 		netOffset := (offset % f.batchSegments)
 		f.hashers[level].Write(netOffset%f.branches, b)
 		copy(f.buffers[level][netOffset*f.segmentSize:], b)
@@ -168,9 +172,9 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 	} else if f.finished && level == 0 {
 		log.Debug("executehasher", "reason", "data done", "level", level, "offset", offset)
 		executeHasher = true
-	} else if f.finished && f.targetCount[level] > 0 && f.targetCount[level] == wc {
+	} else if f.finished && targetCount > 0 && targetCount == wc {
 		<-f.doneC[level-1]
-		log.Debug("executehasher", "reason", "target done", "level", level, "offset", offset)
+		log.Debug("executehasher", "reason", "target done", "level", level, "offset", offset, "wc", wc)
 		executeHasher = true
 	}
 
@@ -181,15 +185,15 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 			f.lock.Lock()
 			cwc := f.writeCount[level-1]
 			f.lock.Unlock()
-			//if offset%f.branches == 0 && cwc%(f.branches*f.branches) < f.branches {
-			if offset%f.batchSegments == 0 && cwc%f.batchSegments < f.branches { // verify why do we need the latter part?
+			// TODO: verify why do we need the latter part again?
+			if offset%f.batchSegments == 0 && cwc%f.batchSegments < f.branches {
 				log.Debug("dangle done", "level", level)
 				parentOffset := (wc - 1) / f.branches
 				f.lock.Lock()
 				f.wg.Done()
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
-				f.write(b, parentOffset, level+1, total)
+				f.write(b, parentOffset, level+1)
 				return
 			}
 		}
@@ -205,8 +209,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 		// if data is fully written, the current chunk may be shorter than the span
 		var dataUnderSpan int
 		if f.isWriteFinished() {
-			//dataUnderSpan = (f.totalBytes-1)%span + 1
-			dataUnderSpan = (total-1)%span + 1
+			dataUnderSpan = (currentTotal-1)%span + 1
 		} else {
 			dataUnderSpan = span
 		}
@@ -225,23 +228,22 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, total int) {
 		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
 		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
 		f.hashers[level].Reset()
-		go func(level int, wc int, finished bool, total int) {
+		go func(level int, wc int, finished bool, total int, targetCount int) {
 			// if the hasher on the level above is still working, wait for it
 			f.lwg[level+1].Wait()
 			parentOffset := (wc - 1) / f.branches
-			log.Debug(">>>> wc", "wc", wc, "l", level, "f.BatchSegments", f.batchSegments, "parentffset", parentOffset)
-			if (level == 0 && finished) || f.targetCount[level] == wc {
+			if (level == 0 && finished) || targetCount == wc {
 				log.Debug("done", "level", level)
 				f.lock.Lock()
 				f.wg.Done()
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
 			}
-			f.write(hashResult, parentOffset, level+1, total)
+			f.write(hashResult, parentOffset, level+1) //, total)
 			f.lock.Lock()
 			f.lwg[level].Done()
 			f.lock.Unlock()
-		}(level, wc, f.finished, f.totalBytes)
+		}(level, wc, f.finished, currentTotal, targetCount) //f.totalBytes)
 	}
 }
 

From 4759c5c48e3a41e2a6ba613e277cfe3dc1088004 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sat, 20 Oct 2018 12:46:25 +0200
Subject: [PATCH 28/50] swarm/storage: Reinstate testdata gens after filehasher
 rebase

---
 swarm/storage/common_test.go | 33 +++++++++++++++++++++------------
 swarm/storage/split_test.go  |  1 +
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/swarm/storage/common_test.go b/swarm/storage/common_test.go
index 7b27498c60..9113d878cb 100644
--- a/swarm/storage/common_test.go
+++ b/swarm/storage/common_test.go
@@ -162,16 +162,13 @@ func mget(store ChunkStore, hs []Address, f func(h Address, chunk Chunk) error)
 	return err
 }
 
-func testDataReader(l int) (r io.Reader) {
-	return io.LimitReader(rand.Reader, int64(l))
-}
-
-func (r *brokenLimitedReader) Read(buf []byte) (int, error) {
-	if r.off+len(buf) > r.errAt {
-		return 0, fmt.Errorf("Broken reader")
+func generateRandomData(l int) (r io.Reader, slice []byte) {
+	slice = make([]byte, l)
+	if _, err := rand.Read(slice); err != nil {
+		panic("rand error")
 	}
-	r.off += len(buf)
-	return r.lr.Read(buf)
+	r = io.LimitReader(bytes.NewReader(slice), int64(l))
+	return
 }
 
 func generateSerialData(l int, mod int, offset int) (r io.Reader, slice []byte) {
@@ -183,12 +180,24 @@ func generateSerialData(l int, mod int, offset int) (r io.Reader, slice []byte)
 	return
 }
 
-func testStoreRandom(m ChunkStore, processors int, n int, chunksize int64, t *testing.T) {
-	hs, err := mputRandomChunks(m, processors, n, chunksize)
+func testDataReader(l int) (r io.Reader) {
+	return io.LimitReader(rand.Reader, int64(l))
+}
+
+func (r *brokenLimitedReader) Read(buf []byte) (int, error) {
+	if r.off+len(buf) > r.errAt {
+		return 0, fmt.Errorf("Broken reader")
+	}
+	r.off += len(buf)
+	return r.lr.Read(buf)
+}
+
+func testStoreRandom(m ChunkStore, n int, chunksize int64, t *testing.T) {
+	chunks, err := mputRandomChunks(m, n, chunksize)
 	if err != nil {
 		t.Fatalf("expected no error, got %v", err)
 	}
-	err := mget(m, hs, nil)
+	err = mget(m, chunkAddresses(chunks), nil)
 	if err != nil {
 		t.Fatalf("testStore failed: %v", err)
 	}
diff --git a/swarm/storage/split_test.go b/swarm/storage/split_test.go
index 147316779d..28805a6fcd 100644
--- a/swarm/storage/split_test.go
+++ b/swarm/storage/split_test.go
@@ -32,6 +32,7 @@ const DefaultChunkCount = 2
 var MaxExcessSize = DefaultChunkCount
 
 func TestFakeHasher(t *testing.T) {
+	t.Skip("not yet adapted to underlying changes")
 	sectionSize := 32
 	sizes := []int{0, sectionSize - 1, sectionSize, sectionSize + 1, sectionSize * 4, sectionSize*4 + 1}
 	bufSizes := []int{32, 7, sectionSize / 2, sectionSize, sectionSize + 1, sectionSize*4 + 1}

From 99142eab630c7203ab3db6d1bce3aa28f6e7ebd5 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Tue, 23 Oct 2018 11:25:42 +0200
Subject: [PATCH 29/50] WIP Benchmark file hashers

---
 swarm/storage/filehasher_alt.go  |  24 ++++---
 swarm/storage/filehasher_test.go | 106 ++++++++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 11 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index f9a6a6e943..2ad34931a0 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -79,7 +79,10 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	log.Debug("finish set", "levelcount", f.levelCount)
 	for i := altFileHasherMaxLevels; i > f.levelCount; i-- {
 		log.Debug("purging unused level wg", "l", i)
+		f.lock.Lock()
 		f.wg.Done()
+		log.Debug("lock flush level", "level", i)
+		f.lock.Unlock()
 	}
 
 	// calculate the amount of writes expected on each level
@@ -137,8 +140,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 	if level == f.levelCount-1 {
 		copy(f.buffers[level], b)
 		f.wg.Done()
-		f.lock.Unlock()
 		log.Debug("top done", "level", level)
+		f.lock.Unlock()
 		return
 	}
 	f.lock.Unlock()
@@ -236,6 +239,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 				log.Debug("done", "level", level)
 				f.lock.Lock()
 				f.wg.Done()
+				log.Debug("done", "level", level)
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
 			}
@@ -247,11 +251,13 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 	}
 }
 
-func (f *AltFileHasher) wgDoneFunc(level int, prune bool) func() {
-	log.Warn("done", "level", level, "prune", prune)
-	return func() {
-		f.lock.Lock()
-		f.wg.Done()
-		f.lock.Unlock()
-	}
-}
+//
+//func (f *AltFileHasher) wgDoneFunc(level int, prune bool) func() {
+//	log.Warn("done", "level", level, "prune", prune)
+//	return func() {
+//		f.lock.Lock()
+//		f.wg.Done()
+//		log.Debug("done", "level", level)
+//		f.lock.Unlock()
+//	}
+//}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 3b120183b3..e61eede0a8 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -2,7 +2,11 @@ package storage
 
 import (
 	"bytes"
+	"context"
 	"fmt"
+	"io"
+	"strconv"
+	"strings"
 	"testing"
 	"time"
 
@@ -64,8 +68,8 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 1
-	end   = 14
+	start = 7
+	end   = 8
 )
 
 func init() {
@@ -170,3 +174,101 @@ func TestSum(t *testing.T) {
 		t.Fatalf("%d/%d mismatches", mismatch, len(dataLengths))
 	}
 }
+
+func BenchmarkAltFileHasher(b *testing.B) {
+	for i := 0; i < len(dataLengths)-1; i++ {
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkAltFileHasher)
+	}
+}
+
+func benchmarkAltFileHasher(b *testing.B) {
+	params := strings.Split(b.Name(), "/")
+	dataLength, err := strconv.ParseInt(params[1], 10, 64)
+	if err != nil {
+		b.Fatal(err)
+	}
+	_, data := generateSerialData(int(dataLength), 255, 0)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		fh := NewAltFileHasher(newAsyncHasher, 32, 128)
+		l := int64(32)
+		offset := int64(0)
+		for j := int64(0); j < dataLength; j += 32 {
+			remain := dataLength - offset
+			if remain < l {
+				l = remain
+			}
+			fh.Write(data[offset : offset+l])
+			offset += 32
+		}
+		fh.Finish(nil)
+	}
+}
+
+func BenchmarkPyramidHasherCompareAltFileHasher(b *testing.B) {
+
+	for i := 0; i < len(dataLengths)-1; i++ {
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkPyramidHasherCompareAltFileHasher)
+	}
+}
+
+func benchmarkPyramidHasherCompareAltFileHasher(b *testing.B) {
+	//t.ReportAllocs()
+	params := strings.Split(b.Name(), "/")
+	dataLength, err := strconv.ParseInt(params[1], 10, 64)
+	if err != nil {
+		b.Fatal(err)
+	}
+	_, data := generateSerialData(int(dataLength), 255, 0)
+	buf := bytes.NewReader(data)
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		buf.Seek(0, io.SeekStart)
+		putGetter := newTestHasherStore(&FakeChunkStore{}, BMTHash)
+
+		ctx := context.Background()
+		_, wait, err := PyramidSplit(ctx, buf, putGetter, putGetter)
+		if err != nil {
+			b.Fatalf(err.Error())
+		}
+		err = wait(ctx)
+		if err != nil {
+			b.Fatalf(err.Error())
+		}
+	}
+}
+
+func BenchmarkFileHasher(b *testing.B) {
+	for i := 0; i < len(dataLengths)-1; i++ {
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkFileHasher)
+	}
+}
+
+func benchmarkFileHasher(b *testing.B) {
+	params := strings.Split(b.Name(), "/")
+	dataLength, err := strconv.ParseInt(params[1], 10, 64)
+	if err != nil {
+		b.Fatal(err)
+	}
+	_, data := generateSerialData(int(dataLength), 255, 0)
+
+	for i := 0; i < b.N; i++ {
+		for i := start; i < end; i++ {
+			fh := NewFileHasher(newAsyncHasher, 128, 32)
+			for i := 0; i < len(data); i += 32 {
+				max := i + 32
+				if len(data) < max {
+					max = len(data)
+				}
+				_, err := fh.WriteBuffer(i, data[i:max])
+				if err != nil {
+					b.Fatal(err)
+				}
+			}
+
+			fh.SetLength(int64(dataLength))
+			fh.Sum(nil)
+		}
+	}
+}

From 25da853a727cec424c07accaf83a7455b9999366 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Tue, 23 Oct 2018 21:25:39 +0200
Subject: [PATCH 30/50] swarm/storage: WIP fix missed dangling hang, but hang
 on *2/*128+n/*129+n

---
 swarm/storage/filehasher_alt.go  | 62 +++++++++++++++++---------------
 swarm/storage/filehasher_test.go | 10 +++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 2ad34931a0..13086eadd6 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -76,33 +76,39 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	// find our level height and release the unused levels
 	f.levelCount = getLevelsFromLength(f.totalBytes, f.segmentSize, f.branches)
 
-	log.Debug("finish set", "levelcount", f.levelCount)
+	log.Debug("finish set", "levelcount", f.levelCount, "b", len(b))
 	for i := altFileHasherMaxLevels; i > f.levelCount; i-- {
 		log.Debug("purging unused level wg", "l", i)
-		f.lock.Lock()
 		f.wg.Done()
-		log.Debug("lock flush level", "level", i)
-		f.lock.Unlock()
 	}
 
 	// calculate the amount of writes expected on each level
-	target := (f.totalBytes-1)/f.segmentSize + 1
+	target := f.writeCount[0]
+	if b != nil {
+		target++
+	}
+	log.Debug("setting targetcount", "l", 0, "t", target)
+	target = (f.totalBytes-1)/f.segmentSize + 1
 	for i := 1; i < f.levelCount; i++ {
 		target = (target-1)/f.branches + 1
 		f.targetCount[i] = target
 		log.Debug("setting targetcount", "l", i, "t", target)
 	}
+
 	f.lock.Unlock()
 
 	// write and return result when we get it back
-	f.write(b, f.writeCount[0], 0)
+	f.lwg[0].Wait()
+	//f.write(b, f.writeCount[0], 0)
+	f.write(b, f.writeCount[0], 0, f.totalBytes)
 	f.wg.Wait()
 	return f.buffers[f.levelCount-1][:f.segmentSize]
 }
 
 func (f *AltFileHasher) Write(b []byte) {
 	f.totalBytes += len(b)
-	f.write(b, f.writeCount[0], 0)
+	//f.write(b, f.writeCount[0], 0)
+	f.write(b, f.writeCount[0], 0, f.totalBytes)
 }
 
 func (f *AltFileHasher) getPotentialSpan(level int) int {
@@ -115,12 +121,13 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 
 // TODO: ensure local copies of all thread unsafe vars
 // performs recursive hashing on complete batches or data end
-func (f *AltFileHasher) write(b []byte, offset int, level int) {
+//func (f *AltFileHasher) write(b []byte, offset int, level int) {
+func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int) {
 
 	// thread safe state vars
 	f.lock.Lock()
 	wc := f.writeCount[level]
-	currentTotal := f.totalBytes
+	//currentTotal := f.totalBytes
 	targetCount := f.targetCount[level]
 	f.lock.Unlock()
 
@@ -140,8 +147,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 	if level == f.levelCount-1 {
 		copy(f.buffers[level], b)
 		f.wg.Done()
-		log.Debug("top done", "level", level)
 		f.lock.Unlock()
+		log.Debug("top done", "level", level)
 		return
 	}
 	f.lock.Unlock()
@@ -187,18 +194,24 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 		if level > 0 && f.finished {
 			f.lock.Lock()
 			cwc := f.writeCount[level-1]
-			f.lock.Unlock()
+
+			log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "cwc", cwc)
 			// TODO: verify why do we need the latter part again?
-			if offset%f.batchSegments == 0 && cwc%f.batchSegments < f.branches {
-				log.Debug("dangle done", "level", level)
+			childWrites := cwc % f.batchSegments
+			//if offset%f.batchSegments == 0 && childWrites < f.branches {
+			//if offset%f.branches == 0 && childWrites < f.branches && childWrites > 0 {
+			if offset%f.branches == 0 && childWrites < f.branches {
+				f.lwg[level+1].Wait()
+				log.Debug("dangle done", "level", level, "wc", wc)
 				parentOffset := (wc - 1) / f.branches
-				f.lock.Lock()
 				f.wg.Done()
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
-				f.write(b, parentOffset, level+1)
+				//f.write(b, parentOffset, level+1)
+				f.write(b, parentOffset, level+1, currentTotal)
 				return
 			}
+			f.lock.Unlock()
 		}
 
 		f.lock.Lock()
@@ -234,30 +247,21 @@ func (f *AltFileHasher) write(b []byte, offset int, level int) {
 		go func(level int, wc int, finished bool, total int, targetCount int) {
 			// if the hasher on the level above is still working, wait for it
 			f.lwg[level+1].Wait()
+			log.Debug("gofunc hash up", "level", level, "wc", wc)
 			parentOffset := (wc - 1) / f.branches
 			if (level == 0 && finished) || targetCount == wc {
 				log.Debug("done", "level", level)
 				f.lock.Lock()
 				f.wg.Done()
-				log.Debug("done", "level", level)
 				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
 			}
-			f.write(hashResult, parentOffset, level+1) //, total)
+			//f.write(hashResult, parentOffset, level+1)
+			f.write(hashResult, parentOffset, level+1, total)
 			f.lock.Lock()
 			f.lwg[level].Done()
 			f.lock.Unlock()
-		}(level, wc, f.finished, currentTotal, targetCount) //f.totalBytes)
+		}(level, wc, f.finished, currentTotal, targetCount)
+
 	}
 }
-
-//
-//func (f *AltFileHasher) wgDoneFunc(level int, prune bool) func() {
-//	log.Warn("done", "level", level, "prune", prune)
-//	return func() {
-//		f.lock.Lock()
-//		f.wg.Done()
-//		log.Debug("done", "level", level)
-//		f.lock.Unlock()
-//	}
-//}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index e61eede0a8..e62b79afd1 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -68,8 +68,8 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 7
-	end   = 8
+	start = 19
+	end   = 20
 )
 
 func init() {
@@ -176,7 +176,7 @@ func TestSum(t *testing.T) {
 }
 
 func BenchmarkAltFileHasher(b *testing.B) {
-	for i := 0; i < len(dataLengths)-1; i++ {
+	for i := start; i < end; i++ {
 		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkAltFileHasher)
 	}
 }
@@ -207,7 +207,7 @@ func benchmarkAltFileHasher(b *testing.B) {
 
 func BenchmarkPyramidHasherCompareAltFileHasher(b *testing.B) {
 
-	for i := 0; i < len(dataLengths)-1; i++ {
+	for i := start; i < end; i++ {
 		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkPyramidHasherCompareAltFileHasher)
 	}
 }
@@ -240,7 +240,7 @@ func benchmarkPyramidHasherCompareAltFileHasher(b *testing.B) {
 }
 
 func BenchmarkFileHasher(b *testing.B) {
-	for i := 0; i < len(dataLengths)-1; i++ {
+	for i := start; i < end; i++ {
 		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkFileHasher)
 	}
 }

From 010cbcc719ffdf7985ec8da886d8ae761528aa51 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 26 Oct 2018 17:28:21 +0200
Subject: [PATCH 31/50] swarm/storage: WIP pass all tests but altfilehasher
 sometimes hangs in bench

---
 swarm/storage/filehasher_alt.go | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 13086eadd6..cfab63b0d6 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -98,7 +98,6 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Unlock()
 
 	// write and return result when we get it back
-	f.lwg[0].Wait()
 	//f.write(b, f.writeCount[0], 0)
 	f.write(b, f.writeCount[0], 0, f.totalBytes)
 	f.wg.Wait()
@@ -188,6 +187,15 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 		executeHasher = true
 	}
 
+	// if this was a nil data finish instruction and we are on boundary, we've already hashed what we need to hash
+	if f.finished && len(b) == 0 && level == 0 {
+		f.lwg[0].Wait()
+		log.Debug("finished and 0", "wc", wc)
+		if wc%f.branches == 0 {
+			executeHasher = false
+		}
+	}
+
 	if executeHasher {
 
 		// check for the dangling chunk
@@ -200,8 +208,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 			childWrites := cwc % f.batchSegments
 			//if offset%f.batchSegments == 0 && childWrites < f.branches {
 			//if offset%f.branches == 0 && childWrites < f.branches && childWrites > 0 {
-			if offset%f.branches == 0 && childWrites < f.branches {
-				f.lwg[level+1].Wait()
+			if offset%f.branches == 0 && childWrites <= f.branches {
+				//		f.lwg[level+1].Wait()
 				log.Debug("dangle done", "level", level, "wc", wc)
 				parentOffset := (wc - 1) / f.branches
 				f.wg.Done()

From 0b484f49866a3fcb9f26d77dd53a003f339b9191 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 26 Oct 2018 17:39:47 +0200
Subject: [PATCH 32/50] swarm/storage: Add ReferenceFileHasher benchmark
 (unnecessary, but nicetoknow)

---
 swarm/storage/filehasher_test.go | 47 ++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index e62b79afd1..2ce94a2d0e 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -254,21 +254,40 @@ func benchmarkFileHasher(b *testing.B) {
 	_, data := generateSerialData(int(dataLength), 255, 0)
 
 	for i := 0; i < b.N; i++ {
-		for i := start; i < end; i++ {
-			fh := NewFileHasher(newAsyncHasher, 128, 32)
-			for i := 0; i < len(data); i += 32 {
-				max := i + 32
-				if len(data) < max {
-					max = len(data)
-				}
-				_, err := fh.WriteBuffer(i, data[i:max])
-				if err != nil {
-					b.Fatal(err)
-				}
+		fh := NewFileHasher(newAsyncHasher, 128, 32)
+		for i := 0; i < len(data); i += 32 {
+			max := i + 32
+			if len(data) < max {
+				max = len(data)
+			}
+			_, err := fh.WriteBuffer(i, data[i:max])
+			if err != nil {
+				b.Fatal(err)
 			}
-
-			fh.SetLength(int64(dataLength))
-			fh.Sum(nil)
 		}
+
+		fh.SetLength(int64(dataLength))
+		fh.Sum(nil)
+	}
+}
+
+func BenchmarkReferenceHasher(b *testing.B) {
+	for i := start; i < end; i++ {
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkReferenceFileHasher)
+	}
+}
+
+func benchmarkReferenceFileHasher(b *testing.B) {
+	params := strings.Split(b.Name(), "/")
+	dataLength, err := strconv.ParseInt(params[1], 10, 64)
+	if err != nil {
+		b.Fatal(err)
+	}
+	_, data := generateSerialData(int(dataLength), 255, 0)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		h := bmt.New(pool)
+		fh := NewReferenceFileHasher(h, 128)
+		fh.Hash(bytes.NewReader(data), len(data)).Bytes()
 	}
 }

From 466ed06e88bc7f35762eb858863e73a305eed4ea Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 7 Mar 2019 15:41:44 +0100
Subject: [PATCH 33/50] swarm/storage: Resolve hang in AltFileHasher hashing

---
 swarm/storage/filehasher_alt.go  | 76 ++++++++++++++++++++------------
 swarm/storage/filehasher_test.go |  2 +-
 2 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index cfab63b0d6..53277f7f18 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -18,12 +18,12 @@ type AltFileHasher struct {
 	chunkSize     int
 	batchSegments int
 	hashers       [altFileHasherMaxLevels]bmt.SectionWriter
-	buffers       [altFileHasherMaxLevels][]byte         // holds chunk data on each level (todo; push data to channel on complete)
+	buffers       [altFileHasherMaxLevels][]byte         // holds chunk data on each level (todo; push data to channel on complete). Buffers can hold one batch of data
 	levelCount    int                                    // number of levels in this job (only determined when Finish() is called
 	finished      bool                                   // finished writing data
 	totalBytes    int                                    // total data bytes written
-	targetCount   [altFileHasherMaxLevels - 1]int        // expected section writes per level
-	writeCount    [altFileHasherMaxLevels]int            // number of section writes per level
+	targetCount   [altFileHasherMaxLevels - 1]int        // expected segment writes per level
+	writeCount    [altFileHasherMaxLevels]int            // number of segment writes per level
 	doneC         [altFileHasherMaxLevels]chan struct{}  // used to tell parent that child is done writing on right edge
 	wg            sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
 	lwg           [altFileHasherMaxLevels]sync.WaitGroup // used when busy hashing
@@ -68,27 +68,28 @@ func (f *AltFileHasher) isWriteFinished() bool {
 
 func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Lock()
+
+	// if we call finish with additional data
+	// include this data in the total length
 	if b != nil {
 		f.totalBytes += len(b)
 	}
 	f.finished = true
 
-	// find our level height and release the unused levels
+	// find our level height and decrease the waitgroup count to used levels only
 	f.levelCount = getLevelsFromLength(f.totalBytes, f.segmentSize, f.branches)
-
 	log.Debug("finish set", "levelcount", f.levelCount, "b", len(b))
 	for i := altFileHasherMaxLevels; i > f.levelCount; i-- {
 		log.Debug("purging unused level wg", "l", i)
 		f.wg.Done()
 	}
 
-	// calculate the amount of writes expected on each level
-	target := f.writeCount[0]
-	if b != nil {
-		target++
-	}
+	// calculate the amount of write() calls expected in total
+	// start with the amount of data writes (level 0)
+	// add number of writes divided by 128 for every additional level
+	// we don't use targetCount for level 0, since f.finished annotates that it is reached
+	target := (f.totalBytes-1)/f.segmentSize + 1
 	log.Debug("setting targetcount", "l", 0, "t", target)
-	target = (f.totalBytes-1)/f.segmentSize + 1
 	for i := 1; i < f.levelCount; i++ {
 		target = (target-1)/f.branches + 1
 		f.targetCount[i] = target
@@ -98,18 +99,22 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Unlock()
 
 	// write and return result when we get it back
-	//f.write(b, f.writeCount[0], 0)
 	f.write(b, f.writeCount[0], 0, f.totalBytes)
 	f.wg.Wait()
 	return f.buffers[f.levelCount-1][:f.segmentSize]
 }
 
+// Write writes data provided from the buffer to the hasher
+// \TODO currently not safe to write intermediate data of length not multiple of 32 bytes
 func (f *AltFileHasher) Write(b []byte) {
 	f.totalBytes += len(b)
-	//f.write(b, f.writeCount[0], 0)
-	f.write(b, f.writeCount[0], 0, f.totalBytes)
+	for i := 0; i < len(b); i += 32 {
+		f.write(b[i:], f.writeCount[0], 0, f.totalBytes)
+	}
 }
 
+// getPotentialSpan returns the total amount of data that can represented under the given level
+// \TODO use a table instead
 func (f *AltFileHasher) getPotentialSpan(level int) int {
 	span := f.chunkSize
 	for i := 0; i < level; i++ {
@@ -118,12 +123,17 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 	return span
 }
 
+// write writes the provided data directly to the underlying hasher
+// and performs recursive hashing on complete batches or data end
+// b is the data to write
+// offset is the level's segment we are writing to
+// level is the tree level we are writing to
+// currentTotal is the current total of data bytes written so far
 // TODO: ensure local copies of all thread unsafe vars
-// performs recursive hashing on complete batches or data end
 //func (f *AltFileHasher) write(b []byte, offset int, level int) {
 func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int) {
 
-	// thread safe state vars
+	// copy state vars so we don't have to keep lock across the call
 	f.lock.Lock()
 	wc := f.writeCount[level]
 	//currentTotal := f.totalBytes
@@ -132,40 +142,50 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 
 	// only for log, delete on prod
 	if b == nil {
-		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", f.writeCount[level], "total", currentTotal)
+		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", wc, "total", currentTotal)
 	} else {
 		l := 32
 		if len(b) < l {
 			l = len(b)
 		}
-		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", f.writeCount[level], "data", b[:l], "total", currentTotal)
+		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", wc, "data", b[:l], "total", currentTotal)
 	}
 
+	// if top level then b is the root hash which means we are finished
+	// write it to the topmost buffer and release the waitgroup blocking  and then return
 	f.lock.Lock()
-	// if top level then return
 	if level == f.levelCount-1 {
 		copy(f.buffers[level], b)
-		f.wg.Done()
 		f.lock.Unlock()
+		f.wg.Done()
 		log.Debug("top done", "level", level)
 		return
 	}
 	f.lock.Unlock()
 
 	// only write if we have data
-	// b will never be nil except bottom level where it can be nil upon finish (which will have already been hashed if on chunk boundary)
-	if b != nil {
+	// b will never be nil except data level where it can be nil if no additional data is written upon the call to Finish()
+	// (else) if b is nil, and if the data is on a chunk boundary, the data will already have been hashed, which means we're done with that level
+	if len(b) > 0 {
+
+		// get the segment within the batch we are in
 		netOffset := (offset % f.batchSegments)
+
+		// write to the current level's hasher
 		f.hashers[level].Write(netOffset%f.branches, b)
+
+		// copy the data into the buffer
+		// TODO do we need this on the data level? should this be pipe write to something else?
 		copy(f.buffers[level][netOffset*f.segmentSize:], b)
+
+		// increment the write count
 		f.lock.Lock()
 		f.writeCount[level]++
 		wc = f.writeCount[level]
 		f.lock.Unlock()
+
 	} else if wc%f.branches == 0 {
-		f.lock.Lock()
 		f.wg.Done()
-		f.lock.Unlock()
 		f.doneC[level] <- struct{}{}
 		return
 	}
@@ -187,17 +207,15 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 		executeHasher = true
 	}
 
-	// if this was a nil data finish instruction and we are on boundary, we've already hashed what we need to hash
+	// if this was a nil data finish instruction and we are on boundary, we may be still hashing asynchronously. Wait for it to finish
+	// if we are on boundary, no need to hash further
 	if f.finished && len(b) == 0 && level == 0 {
 		f.lwg[0].Wait()
 		log.Debug("finished and 0", "wc", wc)
-		if wc%f.branches == 0 {
-			executeHasher = false
-		}
 	}
 
 	if executeHasher {
-
+		f.lwg[level].Wait()
 		// check for the dangling chunk
 		if level > 0 && f.finished {
 			f.lock.Lock()
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 2ce94a2d0e..c66df197f1 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -68,7 +68,7 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 19
+	start = 0
 	end   = 20
 )
 

From 596dc4a1901e46bdf8e4d5f3d277823ef3c06d5b Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 7 Mar 2019 15:52:38 +0100
Subject: [PATCH 34/50] swarm/storage: Prune redundant locks

---
 swarm/storage/filehasher_alt.go | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 53277f7f18..299d6e0bd2 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -230,8 +230,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 				//		f.lwg[level+1].Wait()
 				log.Debug("dangle done", "level", level, "wc", wc)
 				parentOffset := (wc - 1) / f.branches
-				f.wg.Done()
 				f.lock.Unlock()
+				f.wg.Done()
 				f.doneC[level] <- struct{}{}
 				//f.write(b, parentOffset, level+1)
 				f.write(b, parentOffset, level+1, currentTotal)
@@ -240,9 +240,7 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 			f.lock.Unlock()
 		}
 
-		f.lock.Lock()
 		f.lwg[level].Add(1)
-		f.lock.Unlock()
 
 		// calculate what the potential span under this chunk will be
 		span := f.getPotentialSpan(level)
@@ -277,17 +275,12 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 			parentOffset := (wc - 1) / f.branches
 			if (level == 0 && finished) || targetCount == wc {
 				log.Debug("done", "level", level)
-				f.lock.Lock()
 				f.wg.Done()
-				f.lock.Unlock()
 				f.doneC[level] <- struct{}{}
 			}
 			//f.write(hashResult, parentOffset, level+1)
 			f.write(hashResult, parentOffset, level+1, total)
-			f.lock.Lock()
 			f.lwg[level].Done()
-			f.lock.Unlock()
 		}(level, wc, f.finished, currentTotal, targetCount)
-
 	}
 }

From 1eef2a9f0ff716d3118937aebd74cbe53aa95a9f Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 8 Mar 2019 10:19:18 +0100
Subject: [PATCH 35/50] swarm/storage: Remove more redundant locks

---
 swarm/storage/filehasher_alt.go | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 299d6e0bd2..7066578977 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -26,7 +26,7 @@ type AltFileHasher struct {
 	writeCount    [altFileHasherMaxLevels]int            // number of segment writes per level
 	doneC         [altFileHasherMaxLevels]chan struct{}  // used to tell parent that child is done writing on right edge
 	wg            sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
-	lwg           [altFileHasherMaxLevels]sync.WaitGroup // used when busy hashing
+	lwg           [altFileHasherMaxLevels]sync.WaitGroup // used to block while the level's hasher is busy
 	lock          sync.Mutex                             // protect filehasher state vars
 }
 
@@ -46,6 +46,10 @@ func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, bran
 	return f
 }
 
+func (f *AltFileHasher) incWriteCount(c int, level int) {
+
+}
+
 func (f *AltFileHasher) Reset() {
 	f.totalBytes = 0
 	f.levelCount = 0
@@ -136,7 +140,6 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 	// copy state vars so we don't have to keep lock across the call
 	f.lock.Lock()
 	wc := f.writeCount[level]
-	//currentTotal := f.totalBytes
 	targetCount := f.targetCount[level]
 	f.lock.Unlock()
 
@@ -153,15 +156,12 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 
 	// if top level then b is the root hash which means we are finished
 	// write it to the topmost buffer and release the waitgroup blocking  and then return
-	f.lock.Lock()
 	if level == f.levelCount-1 {
 		copy(f.buffers[level], b)
-		f.lock.Unlock()
 		f.wg.Done()
 		log.Debug("top done", "level", level)
 		return
 	}
-	f.lock.Unlock()
 
 	// only write if we have data
 	// b will never be nil except data level where it can be nil if no additional data is written upon the call to Finish()
@@ -215,25 +215,23 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 	}
 
 	if executeHasher {
+
+		// if we are still hashing the data for this level, wait until we are done
 		f.lwg[level].Wait()
+
 		// check for the dangling chunk
 		if level > 0 && f.finished {
 			f.lock.Lock()
 			cwc := f.writeCount[level-1]
 
 			log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "cwc", cwc)
-			// TODO: verify why do we need the latter part again?
 			childWrites := cwc % f.batchSegments
-			//if offset%f.batchSegments == 0 && childWrites < f.branches {
-			//if offset%f.branches == 0 && childWrites < f.branches && childWrites > 0 {
 			if offset%f.branches == 0 && childWrites <= f.branches {
-				//		f.lwg[level+1].Wait()
 				log.Debug("dangle done", "level", level, "wc", wc)
 				parentOffset := (wc - 1) / f.branches
 				f.lock.Unlock()
 				f.wg.Done()
 				f.doneC[level] <- struct{}{}
-				//f.write(b, parentOffset, level+1)
 				f.write(b, parentOffset, level+1, currentTotal)
 				return
 			}
@@ -262,12 +260,13 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
 		}
 
-		// hash the chunk and write it to the current cursor position on the next level
 		meta := make([]byte, 8)
 		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
 		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
 		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
 		f.hashers[level].Reset()
+
+		// hash the chunk and write it to the current cursor position on the next level
 		go func(level int, wc int, finished bool, total int, targetCount int) {
 			// if the hasher on the level above is still working, wait for it
 			f.lwg[level+1].Wait()
@@ -278,7 +277,6 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 				f.wg.Done()
 				f.doneC[level] <- struct{}{}
 			}
-			//f.write(hashResult, parentOffset, level+1)
 			f.write(hashResult, parentOffset, level+1, total)
 			f.lwg[level].Done()
 		}(level, wc, f.finished, currentTotal, targetCount)

From 55b331cce1b568814a366505f958844e11726af9 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sat, 9 Mar 2019 23:31:34 +0100
Subject: [PATCH 36/50] swarm/storage: WIP hashpool level chan buffer refactor

---
 swarm/storage/filehasher_alt.go  | 534 ++++++++++++++++++++++---------
 swarm/storage/filehasher_test.go |  18 +-
 2 files changed, 390 insertions(+), 162 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 7066578977..53ae30cf87 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -1,9 +1,11 @@
 package storage
 
 import (
+	"context"
 	"encoding/binary"
 	"sync"
 
+	"github.com/ethereum/go-ethereum/common/hexutil"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/swarm/bmt"
 )
@@ -13,21 +15,27 @@ const (
 )
 
 type AltFileHasher struct {
+	ctx           context.Context // per job context
 	branches      int
 	segmentSize   int
 	chunkSize     int
 	batchSegments int
-	hashers       [altFileHasherMaxLevels]bmt.SectionWriter
-	buffers       [altFileHasherMaxLevels][]byte         // holds chunk data on each level (todo; push data to channel on complete). Buffers can hold one batch of data
-	levelCount    int                                    // number of levels in this job (only determined when Finish() is called
-	finished      bool                                   // finished writing data
-	totalBytes    int                                    // total data bytes written
-	targetCount   [altFileHasherMaxLevels - 1]int        // expected segment writes per level
-	writeCount    [altFileHasherMaxLevels]int            // number of segment writes per level
-	doneC         [altFileHasherMaxLevels]chan struct{}  // used to tell parent that child is done writing on right edge
-	wg            sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
-	lwg           [altFileHasherMaxLevels]sync.WaitGroup // used to block while the level's hasher is busy
-	lock          sync.Mutex                             // protect filehasher state vars
+	//hashers       [altFileHasherMaxLevels]bmt.SectionWriter
+	//buffers       [altFileHasherMaxLevels][]byte           // holds chunk data on each level (todo; push data to channel on complete). Buffers can hold one batch of data
+	levelJobs   [altFileHasherMaxLevels]chan fileHashJob // receives finished writes pending hashing to pass on to output handler
+	levelWriteC [altFileHasherMaxLevels]chan []byte
+	levelCount  int // number of levels in this job (only determined when Finish() is called
+	//finished      bool                                     // finished writing data
+	totalBytes  int                             // total data bytes written
+	targetCount [altFileHasherMaxLevels - 1]int // expected segment writes per level
+	writeCount  [altFileHasherMaxLevels]int     // number of segment writes per level
+	//doneC         [altFileHasherMaxLevels]chan struct{}    // used to tell parent that child is done writing on right edge
+	resC chan []byte // used to tell hasher that all is done
+	//wg    sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
+	//lwg   [altFileHasherMaxLevels]sync.WaitGroup // used to block while the level's hasher is busy
+	// TODO replace with rwlock
+	lock       sync.Mutex // protect filehasher state vars
+	hasherPool sync.Pool
 }
 
 func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, branches int) *AltFileHasher {
@@ -36,40 +44,123 @@ func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, bran
 		segmentSize:   segmentSize,
 		chunkSize:     branches * segmentSize,
 		batchSegments: branches * branches,
+		resC:          make(chan []byte),
 	}
 	for i := 0; i < altFileHasherMaxLevels-1; i++ {
-		f.buffers[i] = make([]byte, f.chunkSize*branches) // 4.6M with 9 levels
-		f.hashers[i] = hasherFunc()
-		f.doneC[i] = make(chan struct{}, 1)
+		//f.buffers[i] = make([]byte, f.chunkSize*branches) // 4.6M with 9 levels
+		//f.hashers[i] = hasherFunc()
+		//f.doneC[i] = make(chan struct{}, 1)
+
+		//	f.levelJobs[i] = make(chan fileHashJob, branches*2-1)
+		f.levelWriteC[i] = make(chan []byte)
+	}
+	f.hasherPool.New = func() interface{} {
+		return hasherFunc()
 	}
 	f.Reset()
 	return f
 }
 
-func (f *AltFileHasher) incWriteCount(c int, level int) {
+// fileHashJob is submitted to level buffer channel when a chunk boundary is crossed on write
+type fileHashJob struct {
+	index  int                    // index this write belongs to
+	c      int                    // write data cursor
+	data   []byte                 // data from the write
+	hasher chan bmt.SectionWriter // receives the next free hasher to process the data with
+	sum    []byte                 // holds the hash result
+	last   bool                   // true if this is the last write on the level
+}
+
+// enforces sequential parameters for the job descriptions to the level buffer channels
+// the hasher is retrieved asynchronously so write can happen even if all hashers are busy
+func (f *AltFileHasher) addJob(level int, data []byte, last bool) {
+	j := fileHashJob{
+		index:  f.getWriteCountSafe(level),
+		data:   data,
+		last:   last,
+		hasher: make(chan bmt.SectionWriter, 1),
+	}
+	go func(hasher chan<- bmt.SectionWriter) {
+		log.Debug("getting hasher", "level", level)
+		j.hasher <- f.hasherPool.Get().(*bmt.AsyncHasher)
+		log.Debug("got hasher", "level", level)
+	}(j.hasher)
+	log.Debug("new job", "leve", level, "last", last, "index", j.index)
+	f.levelJobs[level] <- j
+}
+
+func (f *AltFileHasher) cancel(e error) {
+	log.Error("cancel called TODO!")
+}
 
+// makes sure the hasher is clean before it's returned to the pool
+func (f *AltFileHasher) putHasher(h bmt.SectionWriter) {
+	h.Reset()
+	f.hasherPool.Put(h)
 }
 
+// returns true if current write offset of level is on hashing boundary
+func (f *AltFileHasher) isChunkBoundarySafe(level int) bool {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	return f.writeCount[level]%branches == 0
+}
+
+func (f *AltFileHasher) getTotalBytesSafe() int {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	return f.totalBytes
+}
+
+// returns a level's write count
+// holds the lock
+func (f *AltFileHasher) getWriteCountSafe(level int) int {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	return f.writeCount[level]
+}
+
+// increments a level's write count
+// holds the lock
+func (f *AltFileHasher) incWriteCountSafe(level int) int {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	f.writeCount[level]++
+	return f.writeCount[level]
+}
+
+func (f *AltFileHasher) isTopLevelSafe(level int) bool {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	return level == f.levelCount-1
+}
+
+// makes the filehasher ready for new duty
+// implements bmt.SectionWriter
 func (f *AltFileHasher) Reset() {
-	f.totalBytes = 0
-	f.levelCount = 0
-	f.wg.Add(altFileHasherMaxLevels)
 	for i := 0; i < altFileHasherMaxLevels; i++ {
 		if i > 0 {
 			f.targetCount[i-1] = 0
 		}
+		f.levelJobs[i] = make(chan fileHashJob, branches*2-1)
 		f.writeCount[i] = 0
 	}
+	f.totalBytes = 0
+	f.levelCount = 0
+	f.ctx = context.TODO()
+	f.processJobs()
 }
 
-func (f *AltFileHasher) isWriteFinished() bool {
-	var finished bool
-	f.lock.Lock()
-	finished = f.finished
-	f.lock.Unlock()
-	return finished
-}
+// check whether all writes on all levels have finished
+// holds the lock
+//func (f *AltFileHasher) isWriteFinishedSafe() bool {
+//	f.lock.Lock()
+//	defer f.lock.Unlock()
+//	return f.finished
+//}
 
+// Finish marks the final write of the file
+// It returns the root hash of the processed file
 func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Lock()
 
@@ -78,14 +169,13 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	if b != nil {
 		f.totalBytes += len(b)
 	}
-	f.finished = true
 
 	// find our level height and decrease the waitgroup count to used levels only
 	f.levelCount = getLevelsFromLength(f.totalBytes, f.segmentSize, f.branches)
 	log.Debug("finish set", "levelcount", f.levelCount, "b", len(b))
 	for i := altFileHasherMaxLevels; i > f.levelCount; i-- {
-		log.Debug("purging unused level wg", "l", i)
-		f.wg.Done()
+		log.Debug("purging unused level chans", "l", i)
+		close(f.levelJobs[i-1])
 	}
 
 	// calculate the amount of write() calls expected in total
@@ -103,17 +193,28 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Unlock()
 
 	// write and return result when we get it back
-	f.write(b, f.writeCount[0], 0, f.totalBytes)
-	f.wg.Wait()
-	return f.buffers[f.levelCount-1][:f.segmentSize]
+	//f.altwrite(0, b, true)
+	if len(b) > 0 {
+		f.altwrite(0, b, true)
+	} else {
+		f.levelWriteC[0] <- b
+	}
+	r := <-f.resC
+	for i := 0; i < f.levelCount; i++ {
+		log.Debug("purging done chans", "l", i)
+		close(f.levelJobs[i])
+	}
+	return r
 }
 
 // Write writes data provided from the buffer to the hasher
 // \TODO currently not safe to write intermediate data of length not multiple of 32 bytes
 func (f *AltFileHasher) Write(b []byte) {
+	f.lock.Lock()
 	f.totalBytes += len(b)
+	f.lock.Unlock()
 	for i := 0; i < len(b); i += 32 {
-		f.write(b[i:], f.writeCount[0], 0, f.totalBytes)
+		f.altwrite(0, b, false)
 	}
 }
 
@@ -127,6 +228,65 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 	return span
 }
 
+func (f *AltFileHasher) altwrite(level int, b []byte, last bool) {
+	if f.isChunkBoundarySafe(level) {
+		f.addJob(level, b, last)
+	}
+	log.Debug("altwrite levelwritec", "level", level, "last", last, "wc", f.getWriteCountSafe(level))
+	f.levelWriteC[level] <- b
+}
+
+// starts one loop for every level that accepts hashing job
+// propagates sequential writes up the levels
+func (f *AltFileHasher) processJobs() {
+	for i := 0; i < altFileHasherMaxLevels; i++ {
+		go func(i int) {
+			for {
+				select {
+				case j, ok := <-f.levelJobs[i]:
+					if !ok {
+						log.Trace("job channel closed", "i", i)
+						return
+					}
+					if f.isTopLevelSafe(i) {
+						dataPtr := <-f.levelWriteC[i]
+						log.Debug("this is top level so all done", "i", i, "root", hexutil.Encode(dataPtr))
+						f.resC <- dataPtr
+						return
+					}
+					log.Debug("have job write", "level", i, "j", j)
+					h := <-j.hasher
+					for {
+						select {
+						case dataPtr := <-f.levelWriteC[i]:
+							if len(dataPtr) == 0 {
+								j.last = true
+							}
+							if !j.last {
+								log.Trace("job write chan", "level", i, "data", dataPtr)
+								netOffset := (f.getWriteCountSafe(i) % f.batchSegments)
+								h.Write(netOffset%f.branches, dataPtr)
+								f.incWriteCountSafe(i)
+							}
+						case <-f.ctx.Done():
+							return
+						}
+						if f.isChunkBoundarySafe(i) || j.last {
+							log.Trace("chunk boundary|last", "last", j.last, "wc", f.getWriteCountSafe(i), "level", i)
+							f.doHash(h, i, &j)
+							break
+						}
+					}
+				case <-f.ctx.Done():
+					log.Debug("job exiting", "level", i, "err", f.ctx.Err())
+					close(f.levelJobs[i])
+					return
+				}
+			}
+		}(i)
+	}
+}
+
 // write writes the provided data directly to the underlying hasher
 // and performs recursive hashing on complete batches or data end
 // b is the data to write
@@ -134,111 +294,106 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 // level is the tree level we are writing to
 // currentTotal is the current total of data bytes written so far
 // TODO: ensure local copies of all thread unsafe vars
-//func (f *AltFileHasher) write(b []byte, offset int, level int) {
-func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int) {
-
-	// copy state vars so we don't have to keep lock across the call
-	f.lock.Lock()
-	wc := f.writeCount[level]
-	targetCount := f.targetCount[level]
-	f.lock.Unlock()
-
-	// only for log, delete on prod
-	if b == nil {
-		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", wc, "total", currentTotal)
-	} else {
-		l := 32
-		if len(b) < l {
-			l = len(b)
-		}
-		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", wc, "data", b[:l], "total", currentTotal)
-	}
-
-	// if top level then b is the root hash which means we are finished
-	// write it to the topmost buffer and release the waitgroup blocking  and then return
-	if level == f.levelCount-1 {
-		copy(f.buffers[level], b)
-		f.wg.Done()
-		log.Debug("top done", "level", level)
-		return
-	}
-
-	// only write if we have data
-	// b will never be nil except data level where it can be nil if no additional data is written upon the call to Finish()
-	// (else) if b is nil, and if the data is on a chunk boundary, the data will already have been hashed, which means we're done with that level
-	if len(b) > 0 {
-
-		// get the segment within the batch we are in
-		netOffset := (offset % f.batchSegments)
-
-		// write to the current level's hasher
-		f.hashers[level].Write(netOffset%f.branches, b)
-
-		// copy the data into the buffer
-		// TODO do we need this on the data level? should this be pipe write to something else?
-		copy(f.buffers[level][netOffset*f.segmentSize:], b)
-
-		// increment the write count
+//func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int) {
+//
+//	// copy state vars so we don't have to keep lock across the call
+//	wc := f.getWriteCountSafe(level)
+//	f.lock.Lock()
+//	targetCount := f.targetCount[level]
+//	f.lock.Unlock()
+//
+//	// only for log, delete on prod
+//	if b == nil {
+//		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", wc, "total", currentTotal)
+//	} else {
+//		l := 32
+//		if len(b) < l {
+//			l = len(b)
+//		}
+//		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", wc, "data", b[:l], "total", currentTotal)
+//	}
+//
+//	// if top level then b is the root hash which means we are finished
+//	// write it to the topmost buffer and release the waitgroup blocking  and then return
+//	// \TODO should never be called when we refactor to separate hasher level buffer handler
+//	if f.isTopLevelSafe(level) {
+//		copy(f.buffers[level], b)
+//		f.wg.Done()
+//		log.Debug("top done", "level", level)
+//		return
+//	}
+//
+//	// only write if we have data
+//	// b will never be nil except data level where it can be nil if no additional data is written upon the call to Finish()
+//	// (else) if b is nil, and if the data is on a chunk boundary, the data will already have been hashed, which means we're done with that level
+//	if len(b) > 0 {
+//
+//		// get the segment within the batch we are in
+//		netOffset := (offset % f.batchSegments)
+//
+//		// write to the current level's hasher
+//		f.hashers[level].Write(netOffset%f.branches, b)
+//
+//		// copy the data into the buffer
+//		copy(f.buffers[level][netOffset*f.segmentSize:], b)
+//
+//		// increment the write count
+//		wc = f.incWriteCountSafe(level)
+//
+//	} else if wc%f.branches == 0 {
+//		f.wg.Done()
+//		f.doneC[level] <- struct{}{}
+//		return
+//	}
+//
+//	// execute the hasher if:
+//	// - we are on a chunk edge
+//	// - we are on the data level and writes are set to finished
+//	// - we are above data level, writes are finished, and expected level write count is reached
+//	executeHasher := false
+//	if wc%f.branches == 0 {
+//		log.Debug("executehasher", "reason", "edge", "level", level, "offset", offset)
+//		executeHasher = true
+//	} else if f.finished && level == 0 {
+//		log.Debug("executehasher", "reason", "data done", "level", level, "offset", offset)
+//		executeHasher = true
+//	} else if f.finished && targetCount > 0 && targetCount == wc {
+//		<-f.doneC[level-1]
+//		log.Debug("executehasher", "reason", "target done", "level", level, "offset", offset, "wc", wc)
+//		executeHasher = true
+//	}
+//
+//	// if this was a nil data finish instruction and we are on boundary, we may be still hashing asynchronously. Wait for it to finish
+//	// if we are on boundary, no need to hash further
+//	if f.finished && len(b) == 0 && level == 0 {
+//		f.lwg[0].Wait()
+//		log.Debug("finished and 0", "wc", wc)
+//	}
+//
+//	if executeHasher {
+//		f.doHash()
+//	}
+//}
+
+// synchronous method that hashes the data contained in the job
+// modifies fileHashJob in place
+func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
+
+	// check for the dangling chunk
+	if level > 0 && j.last {
+		writeCountBelow := f.getWriteCountSafe(level - 1)
+		offset := f.getWriteCountSafe(level)
 		f.lock.Lock()
-		f.writeCount[level]++
-		wc = f.writeCount[level]
-		f.lock.Unlock()
-
-	} else if wc%f.branches == 0 {
-		f.wg.Done()
-		f.doneC[level] <- struct{}{}
-		return
-	}
-
-	// execute the hasher if:
-	// - we are on a chunk edge
-	// - we are on the data level and writes are set to finished
-	// - we are above data level, writes are finished, and expected level write count is reached
-	executeHasher := false
-	if wc%f.branches == 0 {
-		log.Debug("executehasher", "reason", "edge", "level", level, "offset", offset)
-		executeHasher = true
-	} else if f.finished && level == 0 {
-		log.Debug("executehasher", "reason", "data done", "level", level, "offset", offset)
-		executeHasher = true
-	} else if f.finished && targetCount > 0 && targetCount == wc {
-		<-f.doneC[level-1]
-		log.Debug("executehasher", "reason", "target done", "level", level, "offset", offset, "wc", wc)
-		executeHasher = true
-	}
-
-	// if this was a nil data finish instruction and we are on boundary, we may be still hashing asynchronously. Wait for it to finish
-	// if we are on boundary, no need to hash further
-	if f.finished && len(b) == 0 && level == 0 {
-		f.lwg[0].Wait()
-		log.Debug("finished and 0", "wc", wc)
-	}
-
-	if executeHasher {
-
-		// if we are still hashing the data for this level, wait until we are done
-		f.lwg[level].Wait()
-
-		// check for the dangling chunk
-		if level > 0 && f.finished {
-			f.lock.Lock()
-			cwc := f.writeCount[level-1]
-
-			log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "cwc", cwc)
-			childWrites := cwc % f.batchSegments
-			if offset%f.branches == 0 && childWrites <= f.branches {
-				log.Debug("dangle done", "level", level, "wc", wc)
-				parentOffset := (wc - 1) / f.branches
-				f.lock.Unlock()
-				f.wg.Done()
-				f.doneC[level] <- struct{}{}
-				f.write(b, parentOffset, level+1, currentTotal)
-				return
-			}
+		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "wc", writeCountBelow)
+		childWrites := writeCountBelow % f.batchSegments
+		if offset%f.branches == 0 && childWrites <= f.branches {
+			log.Debug("dangle done", "level", level, "writeCount", j.c)
 			f.lock.Unlock()
+			f.altwrite(level+1, j.data, true)
+			return
 		}
-
-		f.lwg[level].Add(1)
+		f.lock.Unlock()
+	} else {
 
 		// calculate what the potential span under this chunk will be
 		span := f.getPotentialSpan(level)
@@ -246,8 +401,8 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 		// calculate the actual data under this span
 		// if data is fully written, the current chunk may be shorter than the span
 		var dataUnderSpan int
-		if f.isWriteFinished() {
-			dataUnderSpan = (currentTotal-1)%span + 1
+		if j.last {
+			dataUnderSpan = (f.getTotalBytesSafe()-1)%span + 1
 		} else {
 			dataUnderSpan = span
 		}
@@ -262,23 +417,94 @@ func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int)
 
 		meta := make([]byte, 8)
 		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
-		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
-		hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
-		f.hashers[level].Reset()
-
-		// hash the chunk and write it to the current cursor position on the next level
-		go func(level int, wc int, finished bool, total int, targetCount int) {
-			// if the hasher on the level above is still working, wait for it
-			f.lwg[level+1].Wait()
-			log.Debug("gofunc hash up", "level", level, "wc", wc)
-			parentOffset := (wc - 1) / f.branches
-			if (level == 0 && finished) || targetCount == wc {
-				log.Debug("done", "level", level)
-				f.wg.Done()
-				f.doneC[level] <- struct{}{}
-			}
-			f.write(hashResult, parentOffset, level+1, total)
-			f.lwg[level].Done()
-		}(level, wc, f.finished, currentTotal, targetCount)
+		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", j.c, "hasher", h)
+
+		j.sum = h.Sum(nil, hashDataSize, meta)
+		// write to next level hasher
+
+		// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
+		go func(digest []byte) {
+			log.Trace("next level write", "level", level+1, "digest", digest)
+			f.altwrite(level+1, digest, j.last)
+		}(j.sum)
+
+		// also write to output
+		go func() {
+			log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
+		}()
+		f.putHasher(h)
 	}
+
+	// close this job channel if this is the last write
+	//	if j.last {
+	//		log.Trace("dohash last close chan", "level", level)
+	//		close(f.levelJobs[level])
+	//	}
 }
+
+//func (f *AltFileHasher) doHash_() {
+//	// if we are still hashing the data for this level, wait until we are done
+//	f.lwg[level].Wait()
+//
+//	// check for the dangling chunk
+//	if level > 0 && f.finished {
+//		cwc := f.getWriteCountSafe(level - 1)
+//
+//		f.lock.Lock()
+//		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "cwc", cwc)
+//		childWrites := cwc % f.batchSegments
+//		if offset%f.branches == 0 && childWrites <= f.branches {
+//			log.Debug("dangle done", "level", level, "wc", wc)
+//			parentOffset := (wc - 1) / f.branches
+//			f.lock.Unlock()
+//			f.wg.Done()
+//			f.doneC[level] <- struct{}{}
+//			f.write(b, parentOffset, level+1, currentTotal)
+//			return
+//		}
+//		f.lock.Unlock()
+//	}
+//
+//	f.lwg[level].Add(1)
+//
+//	// calculate what the potential span under this chunk will be
+//	span := f.getPotentialSpan(level)
+//
+//	// calculate the actual data under this span
+//	// if data is fully written, the current chunk may be shorter than the span
+//	var dataUnderSpan int
+//	if f.isWriteFinishedSafe() {
+//		dataUnderSpan = (currentTotal-1)%span + 1
+//	} else {
+//		dataUnderSpan = span
+//	}
+//
+//	// calculate the length of the actual data in this chunk (the data to be hashed)
+//	var hashDataSize int
+//	if level == 0 {
+//		hashDataSize = dataUnderSpan
+//	} else {
+//		hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
+//	}
+//
+//	meta := make([]byte, 8)
+//	binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
+//	log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
+//	hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
+//	f.hashers[level].Reset()
+//
+//	// hash the chunk and write it to the current cursor position on the next level
+//	go func(level int, wc int, finished bool, currentTotal int, targetCount int) {
+//		// if the hasher on the level above is still working, wait for it
+//		f.lwg[level+1].Wait()
+//		log.Debug("gofunc hash up", "level", level, "wc", wc)
+//		parentOffset := (wc - 1) / f.branches
+//		if (level == 0 && finished) || targetCount == wc {
+//			log.Debug("done", "level", level)
+//			f.wg.Done()
+//			f.doneC[level] <- struct{}{}
+//		}
+//		f.write(hashResult, parentOffset, level+1, currentTotal)
+//		f.lwg[level].Done()
+//	}(level, wc, f.finished, currentTotal, targetCount)
+//}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index c66df197f1..b5a0d48af8 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -69,14 +69,16 @@ var (
 	}
 
 	start = 0
-	end   = 20
+	end   = 7
 )
 
-func init() {
-	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
-}
+//
+//func init() {
+//	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
+//}
 
 func newAsyncHasher() bmt.SectionWriter {
+	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
 	h := bmt.New(pool)
 	return h.NewAsyncWriter(false)
 }
@@ -86,7 +88,7 @@ func TestAltFileHasher(t *testing.T) {
 
 	for i := start; i < end; i++ {
 		dataLength := dataLengths[i]
-		log.Info("start", "len", dataLength)
+		log.Info("start", "i", i, "len", dataLength)
 		fh := NewAltFileHasher(newAsyncHasher, 32, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
 		l := 32
@@ -108,7 +110,7 @@ func TestAltFileHasher(t *testing.T) {
 		t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i])
 	}
 	if mismatch > 0 {
-		t.Fatalf("mismatches: %d/%d", mismatch, len(dataLengths))
+		t.Fatalf("mismatches: %d/%d", mismatch, end-start)
 	}
 }
 
@@ -117,7 +119,7 @@ func TestReferenceFileHasher(t *testing.T) {
 	var mismatch int
 	for i := start; i < end; i++ {
 		dataLength := dataLengths[i]
-		log.Info("start", "len", dataLength)
+		log.Info("start", "i", i, "len", dataLength)
 		fh := NewReferenceFileHasher(h, 128)
 		_, data := generateSerialData(dataLength, 255, 0)
 		refHash := fh.Hash(bytes.NewReader(data), len(data)).Bytes()
@@ -129,7 +131,7 @@ func TestReferenceFileHasher(t *testing.T) {
 		t.Logf("[%7d+%4d]\t%v\tref: %x\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i])
 	}
 	if mismatch > 0 {
-		t.Fatalf("mismatches: %d/%d", mismatch, len(dataLengths))
+		t.Fatalf("mismatches: %d/%d", mismatch, end-start)
 	}
 }
 

From 07e9efd5cccef28e6efd5f613cd97ed604b07768 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sun, 10 Mar 2019 10:03:20 +0100
Subject: [PATCH 37/50] swarm/storage: WIP levelWriteC hang on last

---
 swarm/storage/filehasher_alt.go  | 167 +++++++++++++++++++------------
 swarm/storage/filehasher_test.go |   2 +-
 2 files changed, 105 insertions(+), 64 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 53ae30cf87..25a5eff87a 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -26,9 +26,10 @@ type AltFileHasher struct {
 	levelWriteC [altFileHasherMaxLevels]chan []byte
 	levelCount  int // number of levels in this job (only determined when Finish() is called
 	//finished      bool                                     // finished writing data
-	totalBytes  int                             // total data bytes written
-	targetCount [altFileHasherMaxLevels - 1]int // expected segment writes per level
-	writeCount  [altFileHasherMaxLevels]int     // number of segment writes per level
+	totalBytes      int                             // total data bytes written
+	targetCount     [altFileHasherMaxLevels - 1]int // expected segment writes per level
+	writeCount      [altFileHasherMaxLevels]int     // number of segment writes per level
+	writeEventCount [altFileHasherMaxLevels]int     // number of writes received by channel
 	//doneC         [altFileHasherMaxLevels]chan struct{}    // used to tell parent that child is done writing on right edge
 	resC chan []byte // used to tell hasher that all is done
 	//wg    sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
@@ -100,10 +101,10 @@ func (f *AltFileHasher) putHasher(h bmt.SectionWriter) {
 }
 
 // returns true if current write offset of level is on hashing boundary
-func (f *AltFileHasher) isChunkBoundarySafe(level int) bool {
-	f.lock.Lock()
-	defer f.lock.Unlock()
-	return f.writeCount[level]%branches == 0
+func (f *AltFileHasher) isChunkBoundary(level int, wc int) bool {
+	isboundary := wc%f.branches == 0
+	log.Trace("check chunk boundary", "level", level, "wc", wc, "is", isboundary)
+	return isboundary
 }
 
 func (f *AltFileHasher) getTotalBytesSafe() int {
@@ -112,6 +113,23 @@ func (f *AltFileHasher) getTotalBytesSafe() int {
 	return f.totalBytes
 }
 
+// returns a level's write count
+// holds the lock
+func (f *AltFileHasher) getWriteEventCountSafe(level int) int {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	return f.writeEventCount[level]
+}
+
+// increments a level's write count
+// holds the lock
+func (f *AltFileHasher) incWriteEventCountSafe(level int) int {
+	f.lock.Lock()
+	defer f.lock.Unlock()
+	f.writeEventCount[level]++
+	return f.writeEventCount[level]
+}
+
 // returns a level's write count
 // holds the lock
 func (f *AltFileHasher) getWriteCountSafe(level int) int {
@@ -144,6 +162,7 @@ func (f *AltFileHasher) Reset() {
 		}
 		f.levelJobs[i] = make(chan fileHashJob, branches*2-1)
 		f.writeCount[i] = 0
+		f.writeEventCount[i] = 0
 	}
 	f.totalBytes = 0
 	f.levelCount = 0
@@ -193,17 +212,31 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Unlock()
 
 	// write and return result when we get it back
-	//f.altwrite(0, b, true)
+	//f.write(0, b, true)
 	if len(b) > 0 {
-		f.altwrite(0, b, true)
+		f.write(0, b, false)
 	} else {
-		f.levelWriteC[0] <- b
+
+		// if the writecount of write bytecount does not end on a chunk boundary (number of segments in chunk)
+		// we need to poke the job with a final write message
+		segmentWrites := (f.getTotalBytesSafe()-1)/f.segmentSize + 1
+		log.Trace("write end chunk boundary align", "total", f.totalBytes, "segmentwrites", segmentWrites)
+		if segmentWrites%f.branches == 0 {
+			f.addJob(0, nil, true)
+		}
+		f.levelWriteC[0] <- nil
 	}
+
+	// get the result
 	r := <-f.resC
-	for i := 0; i < f.levelCount; i++ {
-		log.Debug("purging done chans", "l", i)
-		close(f.levelJobs[i])
-	}
+
+	// free the rest of the level channels
+	//	for i := 0; i < f.levelCount; i++ {
+	//		log.Debug("purging done chans", "l", i)
+	//		close(f.levelJobs[i])
+	//	}
+
+	//return the reult
 	return r
 }
 
@@ -214,7 +247,7 @@ func (f *AltFileHasher) Write(b []byte) {
 	f.totalBytes += len(b)
 	f.lock.Unlock()
 	for i := 0; i < len(b); i += 32 {
-		f.altwrite(0, b, false)
+		f.write(0, b, false)
 	}
 }
 
@@ -228,11 +261,16 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 	return span
 }
 
-func (f *AltFileHasher) altwrite(level int, b []byte, last bool) {
-	if f.isChunkBoundarySafe(level) {
+// write signals the level channel handler that a new write has taken place
+// it creates a new write job when write count hits chunk boundaries
+// TODO pass writecount offset through function to avoid segmentwrite calculation
+func (f *AltFileHasher) write(level int, b []byte, last bool) {
+	writeCount := f.getWriteCountSafe(level)
+	log.Trace("write chunk boundary align", "writecount", writeCount, "total", f.getTotalBytesSafe())
+	if f.isChunkBoundary(level, writeCount) {
 		f.addJob(level, b, last)
 	}
-	log.Debug("altwrite levelwritec", "level", level, "last", last, "wc", f.getWriteCountSafe(level))
+	log.Debug("write levelwritec", "level", level, "last", last, "wc", writeCount)
 	f.levelWriteC[level] <- b
 }
 
@@ -251,28 +289,31 @@ func (f *AltFileHasher) processJobs() {
 					if f.isTopLevelSafe(i) {
 						dataPtr := <-f.levelWriteC[i]
 						log.Debug("this is top level so all done", "i", i, "root", hexutil.Encode(dataPtr))
+						close(f.levelJobs[i])
 						f.resC <- dataPtr
 						return
 					}
 					log.Debug("have job write", "level", i, "j", j)
 					h := <-j.hasher
 					for {
+						var writeCount int
 						select {
 						case dataPtr := <-f.levelWriteC[i]:
+							writeCount = f.getWriteCountSafe(i)
+							log.Trace("job write chan", "level", i, "data", dataPtr)
 							if len(dataPtr) == 0 {
 								j.last = true
 							}
 							if !j.last {
-								log.Trace("job write chan", "level", i, "data", dataPtr)
-								netOffset := (f.getWriteCountSafe(i) % f.batchSegments)
+								netOffset := (writeCount % f.batchSegments)
 								h.Write(netOffset%f.branches, dataPtr)
-								f.incWriteCountSafe(i)
+								writeCount = f.incWriteCountSafe(i)
 							}
 						case <-f.ctx.Done():
 							return
 						}
-						if f.isChunkBoundarySafe(i) || j.last {
-							log.Trace("chunk boundary|last", "last", j.last, "wc", f.getWriteCountSafe(i), "level", i)
+						if f.isChunkBoundary(i, writeCount) || j.last {
+							log.Trace("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
 							f.doHash(h, i, &j)
 							break
 						}
@@ -389,57 +430,57 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 		if offset%f.branches == 0 && childWrites <= f.branches {
 			log.Debug("dangle done", "level", level, "writeCount", j.c)
 			f.lock.Unlock()
-			f.altwrite(level+1, j.data, true)
+			f.write(level+1, j.data, true)
 			return
 		}
 		f.lock.Unlock()
-	} else {
+	}
 
-		// calculate what the potential span under this chunk will be
-		span := f.getPotentialSpan(level)
+	// calculate what the potential span under this chunk will be
+	span := f.getPotentialSpan(level)
 
-		// calculate the actual data under this span
-		// if data is fully written, the current chunk may be shorter than the span
-		var dataUnderSpan int
-		if j.last {
-			dataUnderSpan = (f.getTotalBytesSafe()-1)%span + 1
-		} else {
-			dataUnderSpan = span
-		}
+	// calculate the actual data under this span
+	// if data is fully written, the current chunk may be shorter than the span
+	var dataUnderSpan int
+	if j.last {
+		dataUnderSpan = (f.getTotalBytesSafe()-1)%span + 1
+	} else {
+		dataUnderSpan = span
+	}
 
-		// calculate the length of the actual data in this chunk (the data to be hashed)
-		var hashDataSize int
-		if level == 0 {
-			hashDataSize = dataUnderSpan
-		} else {
-			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
-		}
+	// calculate the length of the actual data in this chunk (the data to be hashed)
+	var hashDataSize int
+	if level == 0 {
+		hashDataSize = dataUnderSpan
+	} else {
+		hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
+	}
 
-		meta := make([]byte, 8)
-		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
-		log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", j.c, "hasher", h)
+	meta := make([]byte, 8)
+	binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
+	log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", j.c, "hasher", h, "gettotalbytes", f.getTotalBytesSafe(), "last", j.last, "span", span)
 
-		j.sum = h.Sum(nil, hashDataSize, meta)
-		// write to next level hasher
+	j.sum = h.Sum(nil, hashDataSize, meta)
 
-		// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
-		go func(digest []byte) {
-			log.Trace("next level write", "level", level+1, "digest", digest)
-			f.altwrite(level+1, digest, j.last)
-		}(j.sum)
+	// also write to output
+	go func() {
+		log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
+	}()
+	f.putHasher(h)
 
-		// also write to output
-		go func() {
-			log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
-		}()
-		f.putHasher(h)
-	}
+	// write to next level hasher
 
-	// close this job channel if this is the last write
-	//	if j.last {
-	//		log.Trace("dohash last close chan", "level", level)
-	//		close(f.levelJobs[level])
-	//	}
+	// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
+	go func(j *fileHashJob) {
+		log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
+		f.write(level+1, j.sum, j.last)
+
+		// close this job channel if this is the last write
+		if j.last {
+			log.Trace("dohash last close chan", "level", level)
+			close(f.levelJobs[level])
+		}
+	}(j)
 }
 
 //func (f *AltFileHasher) doHash_() {
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index b5a0d48af8..8cc8075e41 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -69,7 +69,7 @@ var (
 	}
 
 	start = 0
-	end   = 7
+	end   = 10
 )
 
 //

From 728cc250608d5da5a1116884247a13886223113d Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sun, 10 Mar 2019 18:35:24 +0100
Subject: [PATCH 38/50] swarm/storage: Removed hang up to 4159 bytes

---
 swarm/storage/filehasher_alt.go | 128 ++++++++++++++++----------------
 1 file changed, 62 insertions(+), 66 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 25a5eff87a..a641f1a1c1 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -26,10 +26,10 @@ type AltFileHasher struct {
 	levelWriteC [altFileHasherMaxLevels]chan []byte
 	levelCount  int // number of levels in this job (only determined when Finish() is called
 	//finished      bool                                     // finished writing data
-	totalBytes      int                             // total data bytes written
-	targetCount     [altFileHasherMaxLevels - 1]int // expected segment writes per level
-	writeCount      [altFileHasherMaxLevels]int     // number of segment writes per level
-	writeEventCount [altFileHasherMaxLevels]int     // number of writes received by channel
+	totalBytes     int                             // total data bytes written
+	targetCount    [altFileHasherMaxLevels - 1]int // expected segment writes per level
+	writeCount     [altFileHasherMaxLevels]int     // number of segment writes received by job buffer per level RENAME
+	writeSyncCount int                             // number of external writes to the filehasher RENAME
 	//doneC         [altFileHasherMaxLevels]chan struct{}    // used to tell parent that child is done writing on right edge
 	resC chan []byte // used to tell hasher that all is done
 	//wg    sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
@@ -113,23 +113,6 @@ func (f *AltFileHasher) getTotalBytesSafe() int {
 	return f.totalBytes
 }
 
-// returns a level's write count
-// holds the lock
-func (f *AltFileHasher) getWriteEventCountSafe(level int) int {
-	f.lock.Lock()
-	defer f.lock.Unlock()
-	return f.writeEventCount[level]
-}
-
-// increments a level's write count
-// holds the lock
-func (f *AltFileHasher) incWriteEventCountSafe(level int) int {
-	f.lock.Lock()
-	defer f.lock.Unlock()
-	f.writeEventCount[level]++
-	return f.writeEventCount[level]
-}
-
 // returns a level's write count
 // holds the lock
 func (f *AltFileHasher) getWriteCountSafe(level int) int {
@@ -153,6 +136,16 @@ func (f *AltFileHasher) isTopLevelSafe(level int) bool {
 	return level == f.levelCount-1
 }
 
+// getPotentialSpan returns the total amount of data that can represented under the given level
+// \TODO use a table instead
+func (f *AltFileHasher) getPotentialSpan(level int) int {
+	span := f.chunkSize
+	for i := 0; i < level; i++ {
+		span *= f.branches
+	}
+	return span
+}
+
 // makes the filehasher ready for new duty
 // implements bmt.SectionWriter
 func (f *AltFileHasher) Reset() {
@@ -162,7 +155,7 @@ func (f *AltFileHasher) Reset() {
 		}
 		f.levelJobs[i] = make(chan fileHashJob, branches*2-1)
 		f.writeCount[i] = 0
-		f.writeEventCount[i] = 0
+		f.writeSyncCount = 0
 	}
 	f.totalBytes = 0
 	f.levelCount = 0
@@ -212,19 +205,20 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	f.lock.Unlock()
 
 	// write and return result when we get it back
-	//f.write(0, b, true)
 	if len(b) > 0 {
-		f.write(0, b, false)
+		f.write(0, f.writeSyncCount, b, false)
+		f.writeSyncCount++
 	} else {
-
-		// if the writecount of write bytecount does not end on a chunk boundary (number of segments in chunk)
-		// we need to poke the job with a final write message
-		segmentWrites := (f.getTotalBytesSafe()-1)/f.segmentSize + 1
-		log.Trace("write end chunk boundary align", "total", f.totalBytes, "segmentwrites", segmentWrites)
-		if segmentWrites%f.branches == 0 {
+		//
+		//		// if the writecount of write bytecount does not end on a chunk boundary (number of segments in chunk)
+		//		// we need to poke the job with a final write message
+		//segmentWrites := (f.getTotalBytesSafe()-1)/f.segmentSize + 1
+		if f.writeSyncCount%f.branches == 0 {
+			log.Trace("write end chunk boundary align", "total", f.totalBytes, "segmentwrites", f.writeSyncCount)
 			f.addJob(0, nil, true)
 		}
-		f.levelWriteC[0] <- nil
+		f.write(0, f.writeSyncCount, nil, true)
+		//f.levelWriteC[0] <- nil
 	}
 
 	// get the result
@@ -247,34 +241,29 @@ func (f *AltFileHasher) Write(b []byte) {
 	f.totalBytes += len(b)
 	f.lock.Unlock()
 	for i := 0; i < len(b); i += 32 {
-		f.write(0, b, false)
+		f.write(0, f.writeSyncCount, b, false)
 	}
-}
-
-// getPotentialSpan returns the total amount of data that can represented under the given level
-// \TODO use a table instead
-func (f *AltFileHasher) getPotentialSpan(level int) int {
-	span := f.chunkSize
-	for i := 0; i < level; i++ {
-		span *= f.branches
-	}
-	return span
+	f.writeSyncCount++
 }
 
 // write signals the level channel handler that a new write has taken place
 // it creates a new write job when write count hits chunk boundaries
 // TODO pass writecount offset through function to avoid segmentwrite calculation
-func (f *AltFileHasher) write(level int, b []byte, last bool) {
-	writeCount := f.getWriteCountSafe(level)
-	log.Trace("write chunk boundary align", "writecount", writeCount, "total", f.getTotalBytesSafe())
-	if f.isChunkBoundary(level, writeCount) {
+func (f *AltFileHasher) write(level int, offset int, b []byte, last bool) {
+	log.Trace("write chunk boundary align", "offset", offset, "total", f.getTotalBytesSafe(), "level", level, "last", last, "datalength", len(b))
+	if f.isChunkBoundary(level, offset) {
 		f.addJob(level, b, last)
 	}
-	log.Debug("write levelwritec", "level", level, "last", last, "wc", writeCount)
-	f.levelWriteC[level] <- b
+	log.Debug("write levelwritec", "level", level, "last", last, "wc", offset)
+	if len(b) > 0 {
+		f.levelWriteC[level] <- b
+	}
+	if last {
+		f.levelWriteC[level] <- nil
+	}
 }
 
-// starts one loop for every level that accepts hashing job
+// itarts one loop for every level that accepts hashing job
 // propagates sequential writes up the levels
 func (f *AltFileHasher) processJobs() {
 	for i := 0; i < altFileHasherMaxLevels; i++ {
@@ -295,28 +284,33 @@ func (f *AltFileHasher) processJobs() {
 					}
 					log.Debug("have job write", "level", i, "j", j)
 					h := <-j.hasher
-					for {
+					var finished bool
+					for !finished {
 						var writeCount int
+						var dataPtr []byte
 						select {
-						case dataPtr := <-f.levelWriteC[i]:
+						case dataPtr = <-f.levelWriteC[i]:
 							writeCount = f.getWriteCountSafe(i)
-							log.Trace("job write chan", "level", i, "data", dataPtr)
 							if len(dataPtr) == 0 {
 								j.last = true
 							}
+							log.Trace("job write chan", "level", i, "data", dataPtr, "wc", writeCount, "last", j.last)
 							if !j.last {
 								netOffset := (writeCount % f.batchSegments)
 								h.Write(netOffset%f.branches, dataPtr)
+							}
+							if len(dataPtr) > 0 {
 								writeCount = f.incWriteCountSafe(i)
 							}
 						case <-f.ctx.Done():
 							return
 						}
-						if f.isChunkBoundary(i, writeCount) || j.last {
+						if (writeCount != 0 && f.isChunkBoundary(i, writeCount)) || j.last {
 							log.Trace("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
 							f.doHash(h, i, &j)
-							break
+							finished = true
 						}
+
 					}
 				case <-f.ctx.Done():
 					log.Debug("job exiting", "level", i, "err", f.ctx.Err())
@@ -421,16 +415,16 @@ func (f *AltFileHasher) processJobs() {
 func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 
 	// check for the dangling chunk
+	offset := f.getWriteCountSafe(level)
 	if level > 0 && j.last {
 		writeCountBelow := f.getWriteCountSafe(level - 1)
-		offset := f.getWriteCountSafe(level)
 		f.lock.Lock()
 		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "wc", writeCountBelow)
 		childWrites := writeCountBelow % f.batchSegments
 		if offset%f.branches == 0 && childWrites <= f.branches {
 			log.Debug("dangle done", "level", level, "writeCount", j.c)
 			f.lock.Unlock()
-			f.write(level+1, j.data, true)
+			f.write(level+1, offset, j.data, true)
 			return
 		}
 		f.lock.Unlock()
@@ -471,16 +465,18 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 	// write to next level hasher
 
 	// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
-	go func(j *fileHashJob) {
-		log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
-		f.write(level+1, j.sum, j.last)
-
-		// close this job channel if this is the last write
-		if j.last {
-			log.Trace("dohash last close chan", "level", level)
-			close(f.levelJobs[level])
-		}
-	}(j)
+	//go func(j *fileHashJob) {
+	log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
+
+	parentOffset := (offset - 1) / f.branches
+	f.write(level+1, parentOffset, j.sum, j.last)
+
+	// close this job channel if this is the last write
+	if j.last {
+		log.Trace("dohash last close chan", "level", level)
+		close(f.levelJobs[level])
+	}
+	//}(j)
 }
 
 //func (f *AltFileHasher) doHash_() {

From 535365e70d4eb740d510c7a06c559efecb748711 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sun, 10 Mar 2019 19:02:27 +0100
Subject: [PATCH 39/50] swarm/storage: Remove commented code

---
 swarm/storage/filehasher_alt.go  | 214 +++----------------------------
 swarm/storage/filehasher_test.go |   7 +-
 2 files changed, 18 insertions(+), 203 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index a641f1a1c1..457d44e50a 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -15,25 +15,19 @@ const (
 )
 
 type AltFileHasher struct {
-	ctx           context.Context // per job context
-	branches      int
-	segmentSize   int
-	chunkSize     int
-	batchSegments int
-	//hashers       [altFileHasherMaxLevels]bmt.SectionWriter
-	//buffers       [altFileHasherMaxLevels][]byte           // holds chunk data on each level (todo; push data to channel on complete). Buffers can hold one batch of data
-	levelJobs   [altFileHasherMaxLevels]chan fileHashJob // receives finished writes pending hashing to pass on to output handler
-	levelWriteC [altFileHasherMaxLevels]chan []byte
-	levelCount  int // number of levels in this job (only determined when Finish() is called
-	//finished      bool                                     // finished writing data
+	ctx            context.Context // per job context
+	branches       int
+	segmentSize    int
+	chunkSize      int
+	batchSegments  int
+	levelJobs      [altFileHasherMaxLevels]chan fileHashJob // receives finished writes pending hashing to pass on to output handler
+	levelWriteC    [altFileHasherMaxLevels]chan []byte
+	levelCount     int                             // number of levels in this job (only determined when Finish() is called
 	totalBytes     int                             // total data bytes written
 	targetCount    [altFileHasherMaxLevels - 1]int // expected segment writes per level
 	writeCount     [altFileHasherMaxLevels]int     // number of segment writes received by job buffer per level RENAME
 	writeSyncCount int                             // number of external writes to the filehasher RENAME
-	//doneC         [altFileHasherMaxLevels]chan struct{}    // used to tell parent that child is done writing on right edge
-	resC chan []byte // used to tell hasher that all is done
-	//wg    sync.WaitGroup                         // used to tell caller hashing is done (maybe be replced by channel, and doneC only internally)
-	//lwg   [altFileHasherMaxLevels]sync.WaitGroup // used to block while the level's hasher is busy
+	resC           chan []byte                     // used to tell hasher that all is done
 	// TODO replace with rwlock
 	lock       sync.Mutex // protect filehasher state vars
 	hasherPool sync.Pool
@@ -48,11 +42,6 @@ func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, bran
 		resC:          make(chan []byte),
 	}
 	for i := 0; i < altFileHasherMaxLevels-1; i++ {
-		//f.buffers[i] = make([]byte, f.chunkSize*branches) // 4.6M with 9 levels
-		//f.hashers[i] = hasherFunc()
-		//f.doneC[i] = make(chan struct{}, 1)
-
-		//	f.levelJobs[i] = make(chan fileHashJob, branches*2-1)
 		f.levelWriteC[i] = make(chan []byte)
 	}
 	f.hasherPool.New = func() interface{} {
@@ -153,7 +142,7 @@ func (f *AltFileHasher) Reset() {
 		if i > 0 {
 			f.targetCount[i-1] = 0
 		}
-		f.levelJobs[i] = make(chan fileHashJob, branches*2-1)
+		f.levelJobs[i] = make(chan fileHashJob, branches-1)
 		f.writeCount[i] = 0
 		f.writeSyncCount = 0
 	}
@@ -163,14 +152,6 @@ func (f *AltFileHasher) Reset() {
 	f.processJobs()
 }
 
-// check whether all writes on all levels have finished
-// holds the lock
-//func (f *AltFileHasher) isWriteFinishedSafe() bool {
-//	f.lock.Lock()
-//	defer f.lock.Unlock()
-//	return f.finished
-//}
-
 // Finish marks the final write of the file
 // It returns the root hash of the processed file
 func (f *AltFileHasher) Finish(b []byte) []byte {
@@ -204,32 +185,24 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 
 	f.lock.Unlock()
 
-	// write and return result when we get it back
+	// if there is data with the last finish call, write this as normal first
 	if len(b) > 0 {
+		f.lock.Lock()
+		f.totalBytes += len(b)
+		f.lock.Unlock()
 		f.write(0, f.writeSyncCount, b, false)
 		f.writeSyncCount++
 	} else {
-		//
-		//		// if the writecount of write bytecount does not end on a chunk boundary (number of segments in chunk)
-		//		// we need to poke the job with a final write message
-		//segmentWrites := (f.getTotalBytesSafe()-1)/f.segmentSize + 1
 		if f.writeSyncCount%f.branches == 0 {
-			log.Trace("write end chunk boundary align", "total", f.totalBytes, "segmentwrites", f.writeSyncCount)
+			log.Trace("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
 			f.addJob(0, nil, true)
 		}
 		f.write(0, f.writeSyncCount, nil, true)
-		//f.levelWriteC[0] <- nil
 	}
 
 	// get the result
 	r := <-f.resC
 
-	// free the rest of the level channels
-	//	for i := 0; i < f.levelCount; i++ {
-	//		log.Debug("purging done chans", "l", i)
-	//		close(f.levelJobs[i])
-	//	}
-
 	//return the reult
 	return r
 }
@@ -322,94 +295,6 @@ func (f *AltFileHasher) processJobs() {
 	}
 }
 
-// write writes the provided data directly to the underlying hasher
-// and performs recursive hashing on complete batches or data end
-// b is the data to write
-// offset is the level's segment we are writing to
-// level is the tree level we are writing to
-// currentTotal is the current total of data bytes written so far
-// TODO: ensure local copies of all thread unsafe vars
-//func (f *AltFileHasher) write(b []byte, offset int, level int, currentTotal int) {
-//
-//	// copy state vars so we don't have to keep lock across the call
-//	wc := f.getWriteCountSafe(level)
-//	f.lock.Lock()
-//	targetCount := f.targetCount[level]
-//	f.lock.Unlock()
-//
-//	// only for log, delete on prod
-//	if b == nil {
-//		log.Debug("write", "level", level, "offset", offset, "length", "nil", "wc", wc, "total", currentTotal)
-//	} else {
-//		l := 32
-//		if len(b) < l {
-//			l = len(b)
-//		}
-//		log.Debug("write", "level", level, "offset", offset, "length", len(b), "wc", wc, "data", b[:l], "total", currentTotal)
-//	}
-//
-//	// if top level then b is the root hash which means we are finished
-//	// write it to the topmost buffer and release the waitgroup blocking  and then return
-//	// \TODO should never be called when we refactor to separate hasher level buffer handler
-//	if f.isTopLevelSafe(level) {
-//		copy(f.buffers[level], b)
-//		f.wg.Done()
-//		log.Debug("top done", "level", level)
-//		return
-//	}
-//
-//	// only write if we have data
-//	// b will never be nil except data level where it can be nil if no additional data is written upon the call to Finish()
-//	// (else) if b is nil, and if the data is on a chunk boundary, the data will already have been hashed, which means we're done with that level
-//	if len(b) > 0 {
-//
-//		// get the segment within the batch we are in
-//		netOffset := (offset % f.batchSegments)
-//
-//		// write to the current level's hasher
-//		f.hashers[level].Write(netOffset%f.branches, b)
-//
-//		// copy the data into the buffer
-//		copy(f.buffers[level][netOffset*f.segmentSize:], b)
-//
-//		// increment the write count
-//		wc = f.incWriteCountSafe(level)
-//
-//	} else if wc%f.branches == 0 {
-//		f.wg.Done()
-//		f.doneC[level] <- struct{}{}
-//		return
-//	}
-//
-//	// execute the hasher if:
-//	// - we are on a chunk edge
-//	// - we are on the data level and writes are set to finished
-//	// - we are above data level, writes are finished, and expected level write count is reached
-//	executeHasher := false
-//	if wc%f.branches == 0 {
-//		log.Debug("executehasher", "reason", "edge", "level", level, "offset", offset)
-//		executeHasher = true
-//	} else if f.finished && level == 0 {
-//		log.Debug("executehasher", "reason", "data done", "level", level, "offset", offset)
-//		executeHasher = true
-//	} else if f.finished && targetCount > 0 && targetCount == wc {
-//		<-f.doneC[level-1]
-//		log.Debug("executehasher", "reason", "target done", "level", level, "offset", offset, "wc", wc)
-//		executeHasher = true
-//	}
-//
-//	// if this was a nil data finish instruction and we are on boundary, we may be still hashing asynchronously. Wait for it to finish
-//	// if we are on boundary, no need to hash further
-//	if f.finished && len(b) == 0 && level == 0 {
-//		f.lwg[0].Wait()
-//		log.Debug("finished and 0", "wc", wc)
-//	}
-//
-//	if executeHasher {
-//		f.doHash()
-//	}
-//}
-
 // synchronous method that hashes the data contained in the job
 // modifies fileHashJob in place
 func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
@@ -465,7 +350,6 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 	// write to next level hasher
 
 	// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
-	//go func(j *fileHashJob) {
 	log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
 
 	parentOffset := (offset - 1) / f.branches
@@ -476,72 +360,4 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 		log.Trace("dohash last close chan", "level", level)
 		close(f.levelJobs[level])
 	}
-	//}(j)
 }
-
-//func (f *AltFileHasher) doHash_() {
-//	// if we are still hashing the data for this level, wait until we are done
-//	f.lwg[level].Wait()
-//
-//	// check for the dangling chunk
-//	if level > 0 && f.finished {
-//		cwc := f.getWriteCountSafe(level - 1)
-//
-//		f.lock.Lock()
-//		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "cwc", cwc)
-//		childWrites := cwc % f.batchSegments
-//		if offset%f.branches == 0 && childWrites <= f.branches {
-//			log.Debug("dangle done", "level", level, "wc", wc)
-//			parentOffset := (wc - 1) / f.branches
-//			f.lock.Unlock()
-//			f.wg.Done()
-//			f.doneC[level] <- struct{}{}
-//			f.write(b, parentOffset, level+1, currentTotal)
-//			return
-//		}
-//		f.lock.Unlock()
-//	}
-//
-//	f.lwg[level].Add(1)
-//
-//	// calculate what the potential span under this chunk will be
-//	span := f.getPotentialSpan(level)
-//
-//	// calculate the actual data under this span
-//	// if data is fully written, the current chunk may be shorter than the span
-//	var dataUnderSpan int
-//	if f.isWriteFinishedSafe() {
-//		dataUnderSpan = (currentTotal-1)%span + 1
-//	} else {
-//		dataUnderSpan = span
-//	}
-//
-//	// calculate the length of the actual data in this chunk (the data to be hashed)
-//	var hashDataSize int
-//	if level == 0 {
-//		hashDataSize = dataUnderSpan
-//	} else {
-//		hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
-//	}
-//
-//	meta := make([]byte, 8)
-//	binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
-//	log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", wc)
-//	hashResult := f.hashers[level].Sum(nil, hashDataSize, meta)
-//	f.hashers[level].Reset()
-//
-//	// hash the chunk and write it to the current cursor position on the next level
-//	go func(level int, wc int, finished bool, currentTotal int, targetCount int) {
-//		// if the hasher on the level above is still working, wait for it
-//		f.lwg[level+1].Wait()
-//		log.Debug("gofunc hash up", "level", level, "wc", wc)
-//		parentOffset := (wc - 1) / f.branches
-//		if (level == 0 && finished) || targetCount == wc {
-//			log.Debug("done", "level", level)
-//			f.wg.Done()
-//			f.doneC[level] <- struct{}{}
-//		}
-//		f.write(hashResult, parentOffset, level+1, currentTotal)
-//		f.lwg[level].Done()
-//	}(level, wc, f.finished, currentTotal, targetCount)
-//}
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 8cc8075e41..14323d1e81 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -68,17 +68,16 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 0
+	start = 5
 	end   = 10
 )
 
-//
 //func init() {
-//	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
+//	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
 //}
 
 func newAsyncHasher() bmt.SectionWriter {
-	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize*32)
+	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
 	h := bmt.New(pool)
 	return h.NewAsyncWriter(false)
 }

From 48c0c3e7fac5874b44a91a0df1665d748ac28c8b Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Sun, 10 Mar 2019 21:54:17 +0100
Subject: [PATCH 40/50] swarm/storage: Fixed all hangs, dangle broken

---
 swarm/storage/filehasher_alt.go  | 104 +++++++++++++++++--------------
 swarm/storage/filehasher_test.go |   4 +-
 2 files changed, 59 insertions(+), 49 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 457d44e50a..8af306a9f6 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -3,6 +3,7 @@ package storage
 import (
 	"context"
 	"encoding/binary"
+	"fmt"
 	"sync"
 
 	"github.com/ethereum/go-ethereum/common/hexutil"
@@ -53,29 +54,29 @@ func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, bran
 
 // fileHashJob is submitted to level buffer channel when a chunk boundary is crossed on write
 type fileHashJob struct {
-	index  int                    // index this write belongs to
-	c      int                    // write data cursor
+	index  int                    // index this write belongs to TODO implement
 	data   []byte                 // data from the write
 	hasher chan bmt.SectionWriter // receives the next free hasher to process the data with
 	sum    []byte                 // holds the hash result
 	last   bool                   // true if this is the last write on the level
+	skip   bool                   // set if hashing should be skipped for this job (used for edge case boundary write end to trigger level 1 in correct order)
 }
 
 // enforces sequential parameters for the job descriptions to the level buffer channels
 // the hasher is retrieved asynchronously so write can happen even if all hashers are busy
-func (f *AltFileHasher) addJob(level int, data []byte, last bool) {
+func (f *AltFileHasher) addJob(level int, data []byte, last bool, skip bool) {
 	j := fileHashJob{
-		index:  f.getWriteCountSafe(level),
 		data:   data,
 		last:   last,
 		hasher: make(chan bmt.SectionWriter, 1),
+		skip:   skip,
 	}
 	go func(hasher chan<- bmt.SectionWriter) {
 		log.Debug("getting hasher", "level", level)
 		j.hasher <- f.hasherPool.Get().(*bmt.AsyncHasher)
 		log.Debug("got hasher", "level", level)
 	}(j.hasher)
-	log.Debug("new job", "leve", level, "last", last, "index", j.index)
+	log.Debug("add job", "level", level, "job", fmt.Sprintf("%p", &j))
 	f.levelJobs[level] <- j
 }
 
@@ -92,7 +93,7 @@ func (f *AltFileHasher) putHasher(h bmt.SectionWriter) {
 // returns true if current write offset of level is on hashing boundary
 func (f *AltFileHasher) isChunkBoundary(level int, wc int) bool {
 	isboundary := wc%f.branches == 0
-	log.Trace("check chunk boundary", "level", level, "wc", wc, "is", isboundary)
+	log.Debug("check chunk boundary", "level", level, "wc", wc, "is", isboundary)
 	return isboundary
 }
 
@@ -192,12 +193,16 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 		f.lock.Unlock()
 		f.write(0, f.writeSyncCount, b, false)
 		f.writeSyncCount++
-	} else {
-		if f.writeSyncCount%f.branches == 0 {
-			log.Trace("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
-			f.addJob(0, nil, true)
+	}
+
+	if f.writeSyncCount%f.branches == 0 {
+		log.Debug("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
+		f.addJob(0, nil, true, true)
+		if f.levelCount > 2 {
+			f.levelWriteC[0] <- nil
 		}
-		f.write(0, f.writeSyncCount, nil, true)
+	} else {
+		f.levelWriteC[0] <- nil
 	}
 
 	// get the result
@@ -225,7 +230,7 @@ func (f *AltFileHasher) Write(b []byte) {
 func (f *AltFileHasher) write(level int, offset int, b []byte, last bool) {
 	log.Trace("write chunk boundary align", "offset", offset, "total", f.getTotalBytesSafe(), "level", level, "last", last, "datalength", len(b))
 	if f.isChunkBoundary(level, offset) {
-		f.addJob(level, b, last)
+		f.addJob(level, b, last, false)
 	}
 	log.Debug("write levelwritec", "level", level, "last", last, "wc", offset)
 	if len(b) > 0 {
@@ -266,20 +271,20 @@ func (f *AltFileHasher) processJobs() {
 							writeCount = f.getWriteCountSafe(i)
 							if len(dataPtr) == 0 {
 								j.last = true
-							}
-							log.Trace("job write chan", "level", i, "data", dataPtr, "wc", writeCount, "last", j.last)
-							if !j.last {
-								netOffset := (writeCount % f.batchSegments)
-								h.Write(netOffset%f.branches, dataPtr)
-							}
-							if len(dataPtr) > 0 {
+							} else {
+								log.Trace("job write chan", "level", i, "data", dataPtr, "wc", writeCount, "last", j.last)
+								if !(j.last && i == 0) {
+									log.Debug("WRITE TO HASHER", "level", i, "wc", writeCount, "data", dataPtr)
+									netOffset := (writeCount % f.batchSegments)
+									h.Write(netOffset%f.branches, dataPtr)
+								}
 								writeCount = f.incWriteCountSafe(i)
 							}
 						case <-f.ctx.Done():
 							return
 						}
 						if (writeCount != 0 && f.isChunkBoundary(i, writeCount)) || j.last {
-							log.Trace("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
+							log.Debug("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
 							f.doHash(h, i, &j)
 							finished = true
 						}
@@ -304,47 +309,52 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 	if level > 0 && j.last {
 		writeCountBelow := f.getWriteCountSafe(level - 1)
 		f.lock.Lock()
-		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "wc", writeCountBelow)
+		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "wcbelow", writeCountBelow)
 		childWrites := writeCountBelow % f.batchSegments
 		if offset%f.branches == 0 && childWrites <= f.branches {
-			log.Debug("dangle done", "level", level, "writeCount", j.c)
+			log.Debug("dangle done", "level", level, "writeCount", offset)
 			f.lock.Unlock()
 			f.write(level+1, offset, j.data, true)
+			close(f.levelJobs[level])
 			return
 		}
 		f.lock.Unlock()
 	}
 
-	// calculate what the potential span under this chunk will be
-	span := f.getPotentialSpan(level)
+	if !j.skip {
+		// calculate what the potential span under this chunk will be
+		span := f.getPotentialSpan(level)
 
-	// calculate the actual data under this span
-	// if data is fully written, the current chunk may be shorter than the span
-	var dataUnderSpan int
-	if j.last {
-		dataUnderSpan = (f.getTotalBytesSafe()-1)%span + 1
-	} else {
-		dataUnderSpan = span
-	}
+		// calculate the actual data under this span
+		// if data is fully written, the current chunk may be shorter than the span
+		var dataUnderSpan int
 
-	// calculate the length of the actual data in this chunk (the data to be hashed)
-	var hashDataSize int
-	if level == 0 {
-		hashDataSize = dataUnderSpan
-	} else {
-		hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
-	}
+		if j.last {
+			dataUnderSpan = (f.getTotalBytesSafe()-1)%span + 1
+		} else {
+			dataUnderSpan = span
+		}
 
-	meta := make([]byte, 8)
-	binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
-	log.Debug("hash", "level", level, "size", hashDataSize, "meta", meta, "wc", j.c, "hasher", h, "gettotalbytes", f.getTotalBytesSafe(), "last", j.last, "span", span)
+		// calculate the length of the actual data in this chunk (the data to be hashed)
+		var hashDataSize int
+		if level == 0 {
+			hashDataSize = dataUnderSpan
+		} else {
+			hashDataSize = ((dataUnderSpan-1)/(span/f.branches) + 1) * f.segmentSize
+		}
+
+		meta := make([]byte, 8)
+		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
+		log.Debug("hash", "level", level, "size", hashDataSize, "job", fmt.Sprintf("%p", j), "meta", meta, "wc", offset, "hasher", h, "gettotalbytes", f.getTotalBytesSafe(), "last", j.last, "span", span, "data", j.data)
 
-	j.sum = h.Sum(nil, hashDataSize, meta)
+		j.sum = h.Sum(nil, hashDataSize, meta)
+		log.Debug("hash done", "level", level, "job", fmt.Sprintf("%p", j), "wc", offset)
 
-	// also write to output
-	go func() {
-		log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
-	}()
+		// also write to output
+		go func() {
+			log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
+		}()
+	}
 	f.putHasher(h)
 
 	// write to next level hasher
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 14323d1e81..acc6fc0813 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -68,8 +68,8 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 5
-	end   = 10
+	start = 0
+	end   = 19
 )
 
 //func init() {

From 1125d168f78b456b2a90b6243f676a9405430ef9 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Mon, 11 Mar 2019 09:14:18 +0100
Subject: [PATCH 41/50] swarm/storage: Improved trigger propagation

---
 swarm/storage/filehasher_alt.go | 139 ++++++++++++++++++++------------
 1 file changed, 89 insertions(+), 50 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 8af306a9f6..ea20c5df58 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -16,19 +16,24 @@ const (
 )
 
 type AltFileHasher struct {
-	ctx            context.Context // per job context
-	branches       int
-	segmentSize    int
-	chunkSize      int
-	batchSegments  int
-	levelJobs      [altFileHasherMaxLevels]chan fileHashJob // receives finished writes pending hashing to pass on to output handler
-	levelWriteC    [altFileHasherMaxLevels]chan []byte
-	levelCount     int                             // number of levels in this job (only determined when Finish() is called
-	totalBytes     int                             // total data bytes written
-	targetCount    [altFileHasherMaxLevels - 1]int // expected segment writes per level
-	writeCount     [altFileHasherMaxLevels]int     // number of segment writes received by job buffer per level RENAME
-	writeSyncCount int                             // number of external writes to the filehasher RENAME
-	resC           chan []byte                     // used to tell hasher that all is done
+	ctx        context.Context // per job context
+	cancelFunc func()          // context cancel
+
+	branches      int // amount of branches in the tree
+	segmentSize   int // single write size (equals hash digest length)
+	chunkSize     int // size of chunks (segmentSize * branches)
+	batchSegments int // amount of write for one batch (batch is branches*(chunkSize/segmentSize) - used for dangling chunk calculation
+
+	totalBytes     int         // total data bytes written
+	writeSyncCount int         // number of external writes to the filehasher RENAME
+	levelCount     int         // number of levels in this job (only determined when Finish() is called
+	resC           chan []byte // used to tell hasher that all is done
+
+	levelJobs   [altFileHasherMaxLevels]chan fileHashJob // receives finished writes pending hashing to pass on to output handler
+	levelWriteC [altFileHasherMaxLevels]chan []byte      // triggers writes to the hasher of the currently active level's job
+	targetCount [altFileHasherMaxLevels - 1]int          // expected segment writes per level (top will always be one write)
+	writeCount  [altFileHasherMaxLevels]int              // number of segment writes received by job buffer per level RENAME
+
 	// TODO replace with rwlock
 	lock       sync.Mutex // protect filehasher state vars
 	hasherPool sync.Pool
@@ -54,34 +59,39 @@ func NewAltFileHasher(hasherFunc func() bmt.SectionWriter, segmentSize int, bran
 
 // fileHashJob is submitted to level buffer channel when a chunk boundary is crossed on write
 type fileHashJob struct {
-	index  int                    // index this write belongs to TODO implement
-	data   []byte                 // data from the write
-	hasher chan bmt.SectionWriter // receives the next free hasher to process the data with
-	sum    []byte                 // holds the hash result
-	last   bool                   // true if this is the last write on the level
-	skip   bool                   // set if hashing should be skipped for this job (used for edge case boundary write end to trigger level 1 in correct order)
+	writecount int                    // number of writes the job has received
+	data       []byte                 // data from the write
+	hasher     chan bmt.SectionWriter // receives the next free hasher to process the data with
+	sum        []byte                 // holds the hash result
+	last       bool                   // true if this is the last write on the level
 }
 
 // enforces sequential parameters for the job descriptions to the level buffer channels
 // the hasher is retrieved asynchronously so write can happen even if all hashers are busy
-func (f *AltFileHasher) addJob(level int, data []byte, last bool, skip bool) {
+func (f *AltFileHasher) addJob(level int, data []byte, last bool) {
 	j := fileHashJob{
 		data:   data,
 		last:   last,
 		hasher: make(chan bmt.SectionWriter, 1),
-		skip:   skip,
 	}
+
+	// asynchronously retrieve the hashers
+	// this allows write jobs to be set up even if all hashers are busy
 	go func(hasher chan<- bmt.SectionWriter) {
 		log.Debug("getting hasher", "level", level)
 		j.hasher <- f.hasherPool.Get().(*bmt.AsyncHasher)
 		log.Debug("got hasher", "level", level)
 	}(j.hasher)
+
+	// add the job to the appropriate level queue
 	log.Debug("add job", "level", level, "job", fmt.Sprintf("%p", &j))
 	f.levelJobs[level] <- j
 }
 
+// cancel the file hashing operation
 func (f *AltFileHasher) cancel(e error) {
-	log.Error("cancel called TODO!")
+	f.cancelFunc()
+	f.Reset()
 }
 
 // makes sure the hasher is clean before it's returned to the pool
@@ -97,6 +107,7 @@ func (f *AltFileHasher) isChunkBoundary(level int, wc int) bool {
 	return isboundary
 }
 
+// returns the total number of bytes written to data level
 func (f *AltFileHasher) getTotalBytesSafe() int {
 	f.lock.Lock()
 	defer f.lock.Unlock()
@@ -120,9 +131,14 @@ func (f *AltFileHasher) incWriteCountSafe(level int) int {
 	return f.writeCount[level]
 }
 
+// check if the given level is top level
+// will always return false before Finish() is called
 func (f *AltFileHasher) isTopLevelSafe(level int) bool {
 	f.lock.Lock()
 	defer f.lock.Unlock()
+	if f.levelCount == 0 {
+		return false
+	}
 	return level == f.levelCount-1
 }
 
@@ -139,6 +155,16 @@ func (f *AltFileHasher) getPotentialSpan(level int) int {
 // makes the filehasher ready for new duty
 // implements bmt.SectionWriter
 func (f *AltFileHasher) Reset() {
+
+	// we always have minimum two levels; data level and top level
+	// the top level will always close itself
+	// here we close all the others
+	if f.levelCount > 0 {
+		for i := 0; i < f.levelCount-2; i++ {
+			close(f.levelJobs[i])
+		}
+	}
+
 	for i := 0; i < altFileHasherMaxLevels; i++ {
 		if i > 0 {
 			f.targetCount[i-1] = 0
@@ -149,7 +175,7 @@ func (f *AltFileHasher) Reset() {
 	}
 	f.totalBytes = 0
 	f.levelCount = 0
-	f.ctx = context.TODO()
+	f.ctx, f.cancelFunc = context.WithCancel(context.Background())
 	f.processJobs()
 }
 
@@ -172,12 +198,22 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 		close(f.levelJobs[i-1])
 	}
 
+	// if there is data with the last finish call, write this as normal first
+	if len(b) > 0 {
+		f.totalBytes += len(b)
+		f.lock.Unlock()
+		f.write(0, f.writeSyncCount, b, false)
+		f.writeSyncCount++
+		f.lock.Lock()
+	}
+
 	// calculate the amount of write() calls expected in total
 	// start with the amount of data writes (level 0)
 	// add number of writes divided by 128 for every additional level
 	// we don't use targetCount for level 0, since f.finished annotates that it is reached
 	target := (f.totalBytes-1)/f.segmentSize + 1
 	log.Debug("setting targetcount", "l", 0, "t", target)
+	f.targetCount[0] = target
 	for i := 1; i < f.levelCount; i++ {
 		target = (target-1)/f.branches + 1
 		f.targetCount[i] = target
@@ -186,28 +222,27 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 
 	f.lock.Unlock()
 
-	// if there is data with the last finish call, write this as normal first
-	if len(b) > 0 {
-		f.lock.Lock()
-		f.totalBytes += len(b)
-		f.lock.Unlock()
-		f.write(0, f.writeSyncCount, b, false)
-		f.writeSyncCount++
-	}
+	log.Warn("foo", "tgt", f.targetCount[f.levelCount-2], "lvl", f.levelCount-2, "br", f.branches)
+	// if the last intermediate level ends on a chunk boundary, we already have our result
+	// and no further action is needed
+	if f.targetCount[f.levelCount-2]%f.branches > 0 {
+
+		// (it will not hash as long as the job write count is 0
+		// if not, we need to trigger hashing on the incomplete chunk write
+		if f.writeSyncCount%f.branches == 0 {
+			log.Debug("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
+			f.addJob(0, nil, true)
 
-	if f.writeSyncCount%f.branches == 0 {
-		log.Debug("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
-		f.addJob(0, nil, true, true)
-		if f.levelCount > 2 {
-			f.levelWriteC[0] <- nil
 		}
-	} else {
 		f.levelWriteC[0] <- nil
 	}
 
 	// get the result
 	r := <-f.resC
 
+	// clean up
+	f.Reset()
+
 	//return the reult
 	return r
 }
@@ -230,7 +265,7 @@ func (f *AltFileHasher) Write(b []byte) {
 func (f *AltFileHasher) write(level int, offset int, b []byte, last bool) {
 	log.Trace("write chunk boundary align", "offset", offset, "total", f.getTotalBytesSafe(), "level", level, "last", last, "datalength", len(b))
 	if f.isChunkBoundary(level, offset) {
-		f.addJob(level, b, last, false)
+		f.addJob(level, b, last)
 	}
 	log.Debug("write levelwritec", "level", level, "last", last, "wc", offset)
 	if len(b) > 0 {
@@ -279,20 +314,25 @@ func (f *AltFileHasher) processJobs() {
 									h.Write(netOffset%f.branches, dataPtr)
 								}
 								writeCount = f.incWriteCountSafe(i)
+								j.writecount++
 							}
 						case <-f.ctx.Done():
 							return
 						}
-						if (writeCount != 0 && f.isChunkBoundary(i, writeCount)) || j.last {
+
+						// enter the hashing and write propagation if we are on chunk boundary or
+						// if we're in the explicitly last write
+						// the latter can be a write without data, which will be the trigger from Finish()
+						if (f.isChunkBoundary(i, writeCount)) || j.last {
 							log.Debug("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
 							f.doHash(h, i, &j)
 							finished = true
 						}
 
 					}
+					f.putHasher(h)
 				case <-f.ctx.Done():
 					log.Debug("job exiting", "level", i, "err", f.ctx.Err())
-					close(f.levelJobs[i])
 					return
 				}
 			}
@@ -300,7 +340,12 @@ func (f *AltFileHasher) processJobs() {
 	}
 }
 
-// synchronous method that hashes the data contained in the job
+// synchronous method that hashes the data (if any) contained in the job
+// in which case it queues write of the result to the parent level
+//
+// if the job contains no data, a zero-length data write is sent to parent
+// this is used to propagate pending hashings of incomplete chunks further up the levels
+//
 // modifies fileHashJob in place
 func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 
@@ -321,7 +366,9 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 		f.lock.Unlock()
 	}
 
-	if !j.skip {
+	// skip hashing if we have no writes in the job
+	if j.writecount > 0 {
+
 		// calculate what the potential span under this chunk will be
 		span := f.getPotentialSpan(level)
 
@@ -355,19 +402,11 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 			log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
 		}()
 	}
-	f.putHasher(h)
 
 	// write to next level hasher
-
 	// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
 	log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
 
 	parentOffset := (offset - 1) / f.branches
 	f.write(level+1, parentOffset, j.sum, j.last)
-
-	// close this job channel if this is the last write
-	if j.last {
-		log.Trace("dohash last close chan", "level", level)
-		close(f.levelJobs[level])
-	}
 }

From 9fc98db14e1da88462466500be894a7e09771fda Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Mon, 11 Mar 2019 10:15:42 +0100
Subject: [PATCH 42/50] swarm/storage: Remove redundant log, avoid reset on
 cancel

---
 swarm/storage/filehasher_alt.go | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index ea20c5df58..50154facd8 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -90,8 +90,20 @@ func (f *AltFileHasher) addJob(level int, data []byte, last bool) {
 
 // cancel the file hashing operation
 func (f *AltFileHasher) cancel(e error) {
+	f.lock.Lock()
+	defer f.lock.Unlock()
 	f.cancelFunc()
-	f.Reset()
+	for i := 0; i < altFileHasherMaxLevels; i++ {
+		select {
+		case _, ok := <-f.levelJobs[i]:
+			if ok {
+				close(f.levelJobs[i])
+			}
+		case <-f.ctx.Done():
+			close(f.levelJobs[i])
+		}
+	}
+	f.levelCount = 0
 }
 
 // makes sure the hasher is clean before it's returned to the pool
@@ -169,7 +181,7 @@ func (f *AltFileHasher) Reset() {
 		if i > 0 {
 			f.targetCount[i-1] = 0
 		}
-		f.levelJobs[i] = make(chan fileHashJob, branches-1)
+		f.levelJobs[i] = make(chan fileHashJob, branches)
 		f.writeCount[i] = 0
 		f.writeSyncCount = 0
 	}
@@ -222,7 +234,6 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 
 	f.lock.Unlock()
 
-	log.Warn("foo", "tgt", f.targetCount[f.levelCount-2], "lvl", f.levelCount-2, "br", f.branches)
 	// if the last intermediate level ends on a chunk boundary, we already have our result
 	// and no further action is needed
 	if f.targetCount[f.levelCount-2]%f.branches > 0 {

From fdb12d9b6e87d44abba807af0b0a009ce054d467 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Tue, 12 Mar 2019 17:14:56 +0100
Subject: [PATCH 43/50] swarm/storage: Add pyramid hasher compare test

---
 swarm/storage/filehasher_alt.go  | 54 ++++++++++++++++----------------
 swarm/storage/filehasher_test.go | 32 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 28 deletions(-)

diff --git a/swarm/storage/filehasher_alt.go b/swarm/storage/filehasher_alt.go
index 50154facd8..4642672208 100644
--- a/swarm/storage/filehasher_alt.go
+++ b/swarm/storage/filehasher_alt.go
@@ -3,11 +3,11 @@ package storage
 import (
 	"context"
 	"encoding/binary"
-	"fmt"
+	//	"fmt"
 	"sync"
 
-	"github.com/ethereum/go-ethereum/common/hexutil"
-	"github.com/ethereum/go-ethereum/log"
+	//	"github.com/ethereum/go-ethereum/common/hexutil"
+	//	"github.com/ethereum/go-ethereum/log"
 	"github.com/ethereum/go-ethereum/swarm/bmt"
 )
 
@@ -78,13 +78,13 @@ func (f *AltFileHasher) addJob(level int, data []byte, last bool) {
 	// asynchronously retrieve the hashers
 	// this allows write jobs to be set up even if all hashers are busy
 	go func(hasher chan<- bmt.SectionWriter) {
-		log.Debug("getting hasher", "level", level)
+		//log.Debug("getting hasher", "level", level)
 		j.hasher <- f.hasherPool.Get().(*bmt.AsyncHasher)
-		log.Debug("got hasher", "level", level)
+		//log.Debug("got hasher", "level", level)
 	}(j.hasher)
 
 	// add the job to the appropriate level queue
-	log.Debug("add job", "level", level, "job", fmt.Sprintf("%p", &j))
+	//log.Debug("add job", "level", level, "job", fmt.Sprintf("%p", &j))
 	f.levelJobs[level] <- j
 }
 
@@ -115,7 +115,7 @@ func (f *AltFileHasher) putHasher(h bmt.SectionWriter) {
 // returns true if current write offset of level is on hashing boundary
 func (f *AltFileHasher) isChunkBoundary(level int, wc int) bool {
 	isboundary := wc%f.branches == 0
-	log.Debug("check chunk boundary", "level", level, "wc", wc, "is", isboundary)
+	//log.Debug("check chunk boundary", "level", level, "wc", wc, "is", isboundary)
 	return isboundary
 }
 
@@ -204,9 +204,9 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 
 	// find our level height and decrease the waitgroup count to used levels only
 	f.levelCount = getLevelsFromLength(f.totalBytes, f.segmentSize, f.branches)
-	log.Debug("finish set", "levelcount", f.levelCount, "b", len(b))
+	//log.Debug("finish set", "levelcount", f.levelCount, "b", len(b))
 	for i := altFileHasherMaxLevels; i > f.levelCount; i-- {
-		log.Debug("purging unused level chans", "l", i)
+		//log.Debug("purging unused level chans", "l", i)
 		close(f.levelJobs[i-1])
 	}
 
@@ -224,12 +224,12 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 	// add number of writes divided by 128 for every additional level
 	// we don't use targetCount for level 0, since f.finished annotates that it is reached
 	target := (f.totalBytes-1)/f.segmentSize + 1
-	log.Debug("setting targetcount", "l", 0, "t", target)
+	//log.Debug("setting targetcount", "l", 0, "t", target)
 	f.targetCount[0] = target
 	for i := 1; i < f.levelCount; i++ {
 		target = (target-1)/f.branches + 1
 		f.targetCount[i] = target
-		log.Debug("setting targetcount", "l", i, "t", target)
+		//log.Debug("setting targetcount", "l", i, "t", target)
 	}
 
 	f.lock.Unlock()
@@ -241,7 +241,7 @@ func (f *AltFileHasher) Finish(b []byte) []byte {
 		// (it will not hash as long as the job write count is 0
 		// if not, we need to trigger hashing on the incomplete chunk write
 		if f.writeSyncCount%f.branches == 0 {
-			log.Debug("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
+			//log.Debug("write end chunk boundary align", "segmentwrites", f.writeSyncCount)
 			f.addJob(0, nil, true)
 
 		}
@@ -274,11 +274,11 @@ func (f *AltFileHasher) Write(b []byte) {
 // it creates a new write job when write count hits chunk boundaries
 // TODO pass writecount offset through function to avoid segmentwrite calculation
 func (f *AltFileHasher) write(level int, offset int, b []byte, last bool) {
-	log.Trace("write chunk boundary align", "offset", offset, "total", f.getTotalBytesSafe(), "level", level, "last", last, "datalength", len(b))
+	//log.Trace("write chunk boundary align", "offset", offset, "total", f.getTotalBytesSafe(), "level", level, "last", last, "datalength", len(b))
 	if f.isChunkBoundary(level, offset) {
 		f.addJob(level, b, last)
 	}
-	log.Debug("write levelwritec", "level", level, "last", last, "wc", offset)
+	//log.Debug("write levelwritec", "level", level, "last", last, "wc", offset)
 	if len(b) > 0 {
 		f.levelWriteC[level] <- b
 	}
@@ -296,17 +296,17 @@ func (f *AltFileHasher) processJobs() {
 				select {
 				case j, ok := <-f.levelJobs[i]:
 					if !ok {
-						log.Trace("job channel closed", "i", i)
+						//log.Trace("job channel closed", "i", i)
 						return
 					}
 					if f.isTopLevelSafe(i) {
 						dataPtr := <-f.levelWriteC[i]
-						log.Debug("this is top level so all done", "i", i, "root", hexutil.Encode(dataPtr))
+						//log.Debug("this is top level so all done", "i", i, "root", hexutil.Encode(dataPtr))
 						close(f.levelJobs[i])
 						f.resC <- dataPtr
 						return
 					}
-					log.Debug("have job write", "level", i, "j", j)
+					//log.Debug("have job write", "level", i, "j", j)
 					h := <-j.hasher
 					var finished bool
 					for !finished {
@@ -318,9 +318,9 @@ func (f *AltFileHasher) processJobs() {
 							if len(dataPtr) == 0 {
 								j.last = true
 							} else {
-								log.Trace("job write chan", "level", i, "data", dataPtr, "wc", writeCount, "last", j.last)
+								//log.Trace("job write chan", "level", i, "data", dataPtr, "wc", writeCount, "last", j.last)
 								if !(j.last && i == 0) {
-									log.Debug("WRITE TO HASHER", "level", i, "wc", writeCount, "data", dataPtr)
+									//log.Debug("WRITE TO HASHER", "level", i, "wc", writeCount, "data", dataPtr)
 									netOffset := (writeCount % f.batchSegments)
 									h.Write(netOffset%f.branches, dataPtr)
 								}
@@ -335,7 +335,7 @@ func (f *AltFileHasher) processJobs() {
 						// if we're in the explicitly last write
 						// the latter can be a write without data, which will be the trigger from Finish()
 						if (f.isChunkBoundary(i, writeCount)) || j.last {
-							log.Debug("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
+							//log.Debug("chunk boundary|last", "last", j.last, "wc", writeCount, "level", i)
 							f.doHash(h, i, &j)
 							finished = true
 						}
@@ -343,7 +343,7 @@ func (f *AltFileHasher) processJobs() {
 					}
 					f.putHasher(h)
 				case <-f.ctx.Done():
-					log.Debug("job exiting", "level", i, "err", f.ctx.Err())
+					//log.Debug("job exiting", "level", i, "err", f.ctx.Err())
 					return
 				}
 			}
@@ -365,10 +365,10 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 	if level > 0 && j.last {
 		writeCountBelow := f.getWriteCountSafe(level - 1)
 		f.lock.Lock()
-		log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "wcbelow", writeCountBelow)
+		//log.Debug("danglecheck", "offset", offset, "f.batchSegments", f.batchSegments, "wcbelow", writeCountBelow)
 		childWrites := writeCountBelow % f.batchSegments
 		if offset%f.branches == 0 && childWrites <= f.branches {
-			log.Debug("dangle done", "level", level, "writeCount", offset)
+			//log.Debug("dangle done", "level", level, "writeCount", offset)
 			f.lock.Unlock()
 			f.write(level+1, offset, j.data, true)
 			close(f.levelJobs[level])
@@ -403,20 +403,20 @@ func (f *AltFileHasher) doHash(h bmt.SectionWriter, level int, j *fileHashJob) {
 
 		meta := make([]byte, 8)
 		binary.LittleEndian.PutUint64(meta, uint64(dataUnderSpan))
-		log.Debug("hash", "level", level, "size", hashDataSize, "job", fmt.Sprintf("%p", j), "meta", meta, "wc", offset, "hasher", h, "gettotalbytes", f.getTotalBytesSafe(), "last", j.last, "span", span, "data", j.data)
+		//log.Debug("hash", "level", level, "size", hashDataSize, "job", fmt.Sprintf("%p", j), "meta", meta, "wc", offset, "hasher", h, "gettotalbytes", f.getTotalBytesSafe(), "last", j.last, "span", span, "data", j.data)
 
 		j.sum = h.Sum(nil, hashDataSize, meta)
-		log.Debug("hash done", "level", level, "job", fmt.Sprintf("%p", j), "wc", offset)
+		//log.Debug("hash done", "level", level, "job", fmt.Sprintf("%p", j), "wc", offset)
 
 		// also write to output
 		go func() {
-			log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
+			//log.Trace("TODO write out to chunk", "sum", hexutil.Encode(j.sum), "data", hexutil.Encode(j.data))
 		}()
 	}
 
 	// write to next level hasher
 	// TODO here we are copying data bytes, can we get away with referencing underlying buffer?
-	log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
+	//log.Trace("next level write", "level", level+1, "digest", hexutil.Encode(j.sum))
 
 	parentOffset := (offset - 1) / f.branches
 	f.write(level+1, parentOffset, j.sum, j.last)
diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index acc6fc0813..d82c1e3bd2 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -69,7 +69,7 @@ var (
 	}
 
 	start = 0
-	end   = 19
+	end   = 20
 )
 
 //func init() {
@@ -134,6 +134,36 @@ func TestReferenceFileHasher(t *testing.T) {
 	}
 }
 
+func TestPyramidHasherCompare(t *testing.T) {
+
+	var mismatch int
+	for i := start; i < end; i++ {
+		dataLength := dataLengths[i]
+		log.Info("start", "i", i, "len", dataLength)
+		_, data := generateSerialData(int(dataLength), 255, 0)
+		buf := bytes.NewReader(data)
+		buf.Seek(0, io.SeekStart)
+		putGetter := newTestHasherStore(&FakeChunkStore{}, BMTHash)
+
+		ctx := context.Background()
+		refHash, wait, err := PyramidSplit(ctx, buf, putGetter, putGetter)
+		if err != nil {
+			t.Fatalf(err.Error())
+		}
+		err = wait(ctx)
+		if err != nil {
+			t.Fatalf(err.Error())
+		}
+		eq := true
+		if expected[i] != refHash.String() {
+			mismatch++
+			eq = false
+		}
+		t.Logf("[%7d+%4d]\t%v\tref: %s\texpect: %s", dataLength/chunkSize, dataLength%chunkSize, eq, refHash, expected[i])
+
+	}
+}
+
 func TestSum(t *testing.T) {
 
 	var mismatch int

From 1aa3c0fcdb72bc553d80be28a7aeb1d9d4f7b015 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 14 Mar 2019 14:39:23 +0100
Subject: [PATCH 44/50] swarm/storage: WIP set up chained writer prototypes

---
 swarm/storage/filehasher_test.go | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index d82c1e3bd2..67e03f6c22 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -72,16 +72,24 @@ var (
 	end   = 20
 )
 
-//func init() {
-//	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
-//}
-
 func newAsyncHasher() bmt.SectionWriter {
 	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
 	h := bmt.New(pool)
 	return h.NewAsyncWriter(false)
 }
 
+func TestNewFileHasher(t *testing.T) {
+	chunker := &FileChunker{}
+	hashFunc := func() SectionHasherTwo {
+		return SectionHasherTwo(NewFilePadder(chunker))
+	}
+	fm, err := NewFileMuxer(hashFunc)
+	if err != nil {
+		t.Fatal(err)
+	}
+	fmt.Println(fm)
+}
+
 func TestAltFileHasher(t *testing.T) {
 	var mismatch int
 

From 3a285880b390c2da1be8dd0411fc66ee8f818098 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 14 Mar 2019 17:05:10 +0100
Subject: [PATCH 45/50] swarm/storage: WIP writethrough implemented

---
 swarm/storage/filehasher_test.go | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 67e03f6c22..0ac364d981 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -68,8 +68,8 @@ var (
 		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
 	}
 
-	start = 0
-	end   = 20
+	start = 6
+	end   = 7
 )
 
 func newAsyncHasher() bmt.SectionWriter {
@@ -83,11 +83,28 @@ func TestNewFileHasher(t *testing.T) {
 	hashFunc := func() SectionHasherTwo {
 		return SectionHasherTwo(NewFilePadder(chunker))
 	}
-	fm, err := NewFileMuxer(hashFunc)
+	fh, err := NewFileMuxer(hashFunc)
 	if err != nil {
 		t.Fatal(err)
 	}
-	fmt.Println(fm)
+	log.Info("filehasher set up", "batchsize", fh.BatchSize(), "padsize", fh.PadSize())
+
+	for i := start; i < end; i++ {
+		dataLength := dataLengths[i]
+		_, data := generateSerialData(dataLength, 255, 0)
+		log.Info(">>>>>>>>> NewFileHasher start", "i", i, "len", dataLength)
+		offset := 0
+		l := fh.SectionSize()
+		for i := 0; i < dataLength; i += 32 {
+			remain := dataLength - offset
+			if remain < l {
+				l = remain
+			}
+			fh.Write(i, data[offset:offset+l])
+			offset += 32
+		}
+		time.Sleep(time.Second)
+	}
 }
 
 func TestAltFileHasher(t *testing.T) {

From 53bd95d5eefe9aecf73e24311b9e72ba22cb85b7 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 14 Mar 2019 20:27:18 +0100
Subject: [PATCH 46/50] swarm/storage: WIP disappointing benchmarks

---
 swarm/storage/filehasher_test.go | 105 +++++++++++++++++++++++--------
 1 file changed, 80 insertions(+), 25 deletions(-)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 0ac364d981..b753b0acec 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -46,32 +46,44 @@ var (
 		chunkSize * 128 * 128, // 19
 	}
 	expected = []string{
-		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d",
-		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02",
-		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e",
-		"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8",
-		"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe",
-		"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea",
-		"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef",
-		"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28",
-		"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285",
-		"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42",
-		"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b",
-		"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9",
-		"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6",
-		"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09",
-		"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576",
-		"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df",
-		"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94",
-		"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199",
-		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1",
-		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b",
+		"ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0
+		"0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1
+		"3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2
+		"95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3
+		"490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4
+		"541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5
+		"c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6
+		"91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7
+		"73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8
+		"db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9
+		"ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10
+		"29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11
+		"61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12
+		"3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13
+		"e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14
+		"485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15
+		"624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16
+		"b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17
+		"59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18
+		"522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19
 	}
 
-	start = 6
-	end   = 7
+	start = 13
+	end   = 14
 )
 
+type wrappedHasher struct {
+	bmt.SectionWriter
+}
+
+func (w *wrappedHasher) BatchSize() uint64 {
+	return 128
+}
+
+func (w *wrappedHasher) PadSize() uint64 {
+	return 0
+}
+
 func newAsyncHasher() bmt.SectionWriter {
 	pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
 	h := bmt.New(pool)
@@ -83,6 +95,11 @@ func TestNewFileHasher(t *testing.T) {
 	hashFunc := func() SectionHasherTwo {
 		return SectionHasherTwo(NewFilePadder(chunker))
 	}
+	hashFunc = func() SectionHasherTwo {
+		return &wrappedHasher{
+			SectionWriter: newAsyncHasher(),
+		}
+	}
 	fh, err := NewFileMuxer(hashFunc)
 	if err != nil {
 		t.Fatal(err)
@@ -100,10 +117,49 @@ func TestNewFileHasher(t *testing.T) {
 			if remain < l {
 				l = remain
 			}
-			fh.Write(i, data[offset:offset+l])
+			fh.Write(i/32, data[offset:offset+l])
+			offset += 32
+		}
+		time.Sleep(time.Second * 2)
+		t.Logf("debug parent: %d - change %d", fh.debugJobParent, fh.debugJobChange)
+	}
+}
+
+func BenchmarkNewFileHasher(b *testing.B) {
+	for i := start; i < end; i++ {
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkNewFileHasher)
+	}
+}
+
+func benchmarkNewFileHasher(b *testing.B) {
+	params := strings.Split(b.Name(), "/")
+	dataLength, err := strconv.ParseInt(params[1], 10, 64)
+	if err != nil {
+		b.Fatal(err)
+	}
+	_, data := generateSerialData(int(dataLength), 255, 0)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		hashFunc := func() SectionHasherTwo {
+			return &wrappedHasher{
+				SectionWriter: newAsyncHasher(),
+			}
+		}
+		fh, err := NewFileMuxer(hashFunc)
+		if err != nil {
+			b.Fatal(err)
+		}
+		l := int64(32)
+		offset := int64(0)
+		for j := int64(0); j < dataLength; j += 32 {
+			remain := dataLength - offset
+			if remain < l {
+				l = remain
+			}
+			fh.Write(int(offset/32), data[offset:offset+l])
 			offset += 32
 		}
-		time.Sleep(time.Second)
+		//fh.Finish(nil)
 	}
 }
 
@@ -269,7 +325,6 @@ func BenchmarkPyramidHasherCompareAltFileHasher(b *testing.B) {
 }
 
 func benchmarkPyramidHasherCompareAltFileHasher(b *testing.B) {
-	//t.ReportAllocs()
 	params := strings.Split(b.Name(), "/")
 	dataLength, err := strconv.ParseInt(params[1], 10, 64)
 	if err != nil {

From 25e0d41b24131a6227761f1dfb2444ec340d83e3 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Thu, 14 Mar 2019 21:19:20 +0100
Subject: [PATCH 47/50] swarm/storage: WIP better benchmark but far off and
 hashes wrong

---
 swarm/storage/filehasher_test.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index b753b0acec..04c4484737 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -122,6 +122,7 @@ func TestNewFileHasher(t *testing.T) {
 		}
 		time.Sleep(time.Second * 2)
 		t.Logf("debug parent: %d - change %d", fh.debugJobParent, fh.debugJobChange)
+		t.Logf("debug bytes top: %x", fh.topJob.debugHash)
 	}
 }
 

From 2bd2b8db3ac4534e6874f57c138a5df5d25520f3 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 15 Mar 2019 09:25:18 +0100
Subject: [PATCH 48/50] swarm/storage: WIP add missing third attempt file,
 still disappointed

---
 swarm/storage/filehasher_thethird.go | 398 +++++++++++++++++++++++++++
 1 file changed, 398 insertions(+)
 create mode 100644 swarm/storage/filehasher_thethird.go

diff --git a/swarm/storage/filehasher_thethird.go b/swarm/storage/filehasher_thethird.go
new file mode 100644
index 0000000000..7dd5d3211c
--- /dev/null
+++ b/swarm/storage/filehasher_thethird.go
@@ -0,0 +1,398 @@
+package storage
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"sync"
+	"sync/atomic"
+
+	"github.com/ethereum/go-ethereum/common/hexutil"
+	"github.com/ethereum/go-ethereum/crypto/sha3"
+	"github.com/ethereum/go-ethereum/swarm/bmt"
+	"github.com/ethereum/go-ethereum/swarm/log"
+)
+
+const (
+	defaultPadSize     = 18
+	defaultSegmentSize = 32
+)
+
+var (
+	hashPool            sync.Pool
+	mockPadding         = [defaultPadSize * defaultSegmentSize]byte{}
+	FileHasherAlgorithm = DefaultHash
+)
+
+func init() {
+	for i := 0; i < len(mockPadding); i++ {
+		mockPadding[i] = 0x01
+	}
+	hashPool.New = func() interface{} {
+
+		pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
+		h := bmt.New(pool)
+		return h.NewAsyncWriter(false)
+	}
+}
+
+func getHasher() bmt.SectionWriter {
+	return hashPool.Get().(bmt.SectionWriter)
+}
+
+func putHasher(h bmt.SectionWriter) {
+	h.Reset()
+	hashPool.Put(h)
+}
+
+type SectionHasherTwo interface {
+	bmt.SectionWriter
+	// Provides:
+	//	Reset()                                       // standard init to be called before reuse
+	//	Write(index int, data []byte)                 // write into section of index
+	//	Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer
+	//	SectionSize() int                             // size of the async section unit to use
+
+	BatchSize() uint64 // sections to write before sum should be called
+	PadSize() uint64   // additional sections that will be written on sum
+}
+
+type FileChunker struct {
+	branches uint64
+}
+
+func NewFileChunker() *FileChunker {
+	return &FileChunker{
+		branches: 128,
+	}
+
+}
+
+func (f *FileChunker) Write(index int, b []byte) {
+	log.Trace("got write", "b", len(b))
+}
+
+func (f *FileChunker) Sum(b []byte, length int, span []byte) []byte {
+	log.Warn("got sum", "b", hexutil.Encode(b), "span", span)
+	return b[:f.SectionSize()]
+}
+
+func (f *FileChunker) BatchSize() uint64 {
+	return branches
+}
+
+func (f *FileChunker) PadSize() uint64 {
+	return 0
+}
+
+func (f *FileChunker) SectionSize() int {
+	return 32
+}
+
+func (f *FileChunker) Reset() {
+	return
+}
+
+// Pads data on hashing
+// will be erasure coding behavior
+type FilePadder struct {
+	hasher bmt.SectionWriter
+	writer SectionHasherTwo
+	buffer []byte
+	limit  int // write count limit (in segments)
+	writeC chan int
+}
+
+func NewFilePadder(writer SectionHasherTwo) *FilePadder {
+	if writer == nil {
+		panic("writer can't be nil")
+	}
+	p := &FilePadder{
+		writer: writer,
+		limit:  110,
+	}
+
+	p.writeC = make(chan int, writer.BatchSize())
+	p.Reset()
+	return p
+}
+
+func (p *FilePadder) BatchSize() uint64 {
+	return p.writer.BatchSize() - p.PadSize()
+}
+
+func (p *FilePadder) PadSize() uint64 {
+	return 18
+}
+
+func (p *FilePadder) Size() int {
+	return p.hasher.SectionSize()
+}
+
+// ignores index
+// TODO Write should return write count
+func (p *FilePadder) Write(index int, b []byte) {
+	//log.Debug("padder write", "index", index, "l", len(b), "c", atomic.AddUint64(&p.debugSize, uint64(len(b))))
+	log.Debug("padder write", "index", index, "l", len(b))
+	if index > p.limit {
+		panic(fmt.Sprintf("write index beyond limit; %d > %d", index, p.limit))
+	}
+	p.hasher.Write(index, b)
+	p.writeBuffer(index, b)
+	p.writeC <- len(b)
+}
+
+func (p *FilePadder) writeBuffer(index int, b []byte) {
+	bytesIndex := index * p.SectionSize()
+	copy(p.buffer[bytesIndex:], b[:p.SectionSize()])
+}
+
+// performs data padding on the supplied data
+// returns padding
+func (p *FilePadder) pad(b []byte) []byte {
+	return mockPadding[:]
+}
+
+// ignores span
+func (p *FilePadder) Sum(b []byte, length int, span []byte) []byte {
+	var writeCount int
+	select {
+	case c, ok := <-p.writeC:
+		if !ok {
+			break
+		}
+		writeCount += c
+		if writeCount == length {
+			break
+		}
+	}
+
+	// at this point we are not concurrent anymore
+	// TODO optimize
+	padding := p.pad(nil)
+	for i := 0; i < len(padding); i += p.hasher.SectionSize() {
+		log.Debug("padder pad", "i", i, "limit", p.limit)
+		p.hasher.Write(p.limit, padding[i:])
+		p.writeBuffer(p.limit, padding[i:])
+		p.limit++
+	}
+	s := p.hasher.Sum(b, length+len(padding), span)
+	//p.writer.Sum(append(s, p.buffer...), length, span)
+	chunk := NewChunk(Address(s), p.buffer)
+	log.Warn("have chunk", "chunk", chunk, "chunkdata", chunk.sdata)
+	putHasher(p.hasher)
+	return s
+}
+
+func (p *FilePadder) Reset() {
+	p.hasher = getHasher()
+	p.buffer = make([]byte, (p.PadSize()+p.BatchSize())*uint64(p.SectionSize()))
+}
+
+// panics if called after sum and before reset
+func (p *FilePadder) SectionSize() int {
+	return p.hasher.SectionSize()
+}
+
+type hasherJob struct {
+	parent        *hasherJob
+	dataOffset    uint64 // global write count this job represents
+	levelOffset   uint64 // offset on this level
+	count         uint64 // amount of writes on this job
+	edge          int    // > 0 on last write, incremented by 1 every level traversed on right edge
+	debugHash     []byte
+	debugLifetime uint32
+	writer        SectionHasherTwo
+}
+
+func (h *hasherJob) reset(w SectionHasherTwo, dataOffset uint64, levelOffset uint64, edge int) {
+	h.debugLifetime++
+	h.count = 0
+	h.dataOffset = dataOffset
+	h.levelOffset = levelOffset
+	h.writer = w
+}
+
+func (h *hasherJob) inc() uint64 {
+	return atomic.AddUint64(&h.count, 1)
+}
+
+// FileMuxer manages the build tree of the data
+type FileMuxer struct {
+	branches        int               // cached branch count
+	sectionSize     int               // cached segment size of writer
+	writerBatchSize uint64            // cached chunk size of chained writer
+	parentBatchSize uint64            // cached number of writes before change parent
+	writerPadSize   uint64            // cached padding size of the chained writer
+	topJob          *hasherJob        // keeps pointer to the current topmost job
+	lastJob         *hasherJob        // keeps pointer to the current data write job
+	lastWrite       uint64            // keeps the last data write count
+	targetCount     uint64            // set when sum is called, is total length of data
+	targetLevel     int               // set when sum is called, is tree level of root chunk
+	balancedTable   map[uint64]uint64 // maps write counts to bytecounts for
+	debugJobChange  uint32
+	debugJobParent  uint32
+
+	writerQueue chan struct{}
+	writerPool  sync.Pool // chained writers providing hashing
+	jobMu       sync.Mutex
+}
+
+func NewFileMuxer(writerFunc func() SectionHasherTwo) (*FileMuxer, error) {
+	if writerFunc == nil {
+		return nil, errors.New("writer cannot be nil")
+	}
+	writer := writerFunc()
+	branches := writer.BatchSize() + writer.PadSize()
+	f := &FileMuxer{
+		branches:        int(branches),
+		sectionSize:     writer.SectionSize(),
+		writerBatchSize: writer.BatchSize(),
+		parentBatchSize: writer.BatchSize() * branches,
+		writerPadSize:   writer.PadSize(),
+		writerQueue:     make(chan struct{}, 1024),
+		balancedTable:   make(map[uint64]uint64),
+	}
+	f.writerPool.New = func() interface{} {
+		return writerFunc()
+	}
+	for i := 0; i < 1000; i++ {
+		f.writerPool.Put(f.writerPool.Get())
+	}
+
+	lastBoundary := uint64(1)
+	f.balancedTable[lastBoundary] = uint64(f.sectionSize)
+	for i := 1; i < 9; i++ {
+		lastBoundary *= uint64(f.branches)
+		f.balancedTable[lastBoundary] = lastBoundary * uint64(f.sectionSize)
+	}
+
+	f.lastJob = &hasherJob{
+		writer: f.getWriter(),
+	}
+	f.topJob = f.lastJob
+
+	//log.Info("init", "fh", f, "table", f.balancedTable, "writer", writer.BatchSize())
+	return f, nil
+}
+
+func (m *FileMuxer) getWriter() SectionHasherTwo {
+	//m.writerQueue <- struct{}{}
+	return m.writerPool.Get().(SectionHasherTwo)
+}
+
+func (m *FileMuxer) putWriter(writer SectionHasherTwo) {
+	writer.Reset()
+	m.writerPool.Put(writer)
+	//<-m.writerQueue
+}
+
+func (m *FileMuxer) BatchSize() uint64 {
+	return m.writerBatchSize + m.writerPadSize
+}
+
+func (m *FileMuxer) PadSize() uint64 {
+	return 0
+}
+
+func (m *FileMuxer) SectionSize() int {
+	return m.sectionSize
+}
+
+func (m *FileMuxer) Write(index int, b []byte) {
+	//log.Trace("data write", "offset", index, "jobcount", m.lastJob.count, "batchsize", m.writerBatchSize)
+
+	m.write(m.lastJob, index%m.branches, b, true)
+	m.lastWrite++
+}
+
+// b byte is not thread safe
+// index is internal within a job (batchsize / sectionsize)
+func (m *FileMuxer) write(h *hasherJob, index int, b []byte, groundlevel bool) {
+
+	// if we are crossing a batch write size, we spawn a new job
+	// and point the data writer's job pointer lastJob to it
+	newcount := h.inc()
+	if newcount > m.writerBatchSize {
+	}
+
+	// write the data to the chain and sum it if:
+	// * the write is on a threshold, or
+	// * if we're done writing
+	//go func(h *hasherJob, newcount uint64, index int, b []byte) {
+	lifetime := atomic.LoadUint32(&h.debugLifetime)
+	log.Trace("job write", "job", fmt.Sprintf("%p", h), "w", fmt.Sprintf("%p", h.writer), "count", newcount, "index", index, "lifetime", lifetime, "data", hexutil.Encode(b))
+	// write to the chained writer
+	h.writer.Write(index, b)
+
+	// check threshold or done
+	if newcount == m.writerBatchSize || h.edge > 0 {
+
+		// copy the vars at the time of call
+		dataOffset := h.dataOffset
+
+		//go func(index int, w SectionHasherTwo, p *hasherJob) {
+		go func(dataOffset uint64, levelOffset uint64, w SectionHasherTwo, p *hasherJob) {
+			thisJobLength := (newcount * uint64(m.sectionSize)) + uint64(len(b)%m.sectionSize)
+
+			// span is the total size under the chunk
+			// BUG dataoffset needs modulo levelindex
+			spanBytes := make([]byte, 8)
+
+			binary.LittleEndian.PutUint64(spanBytes, uint64(dataOffset+thisJobLength))
+
+			log.Debug("jobwrite sum", "w", fmt.Sprintf("%p", w), "l", thisJobLength, "span", spanBytes)
+			// sum the data using the chained writer
+
+			s := w.Sum(
+				nil,
+				int(thisJobLength),
+				spanBytes,
+			)
+
+			// reset the chained writer
+			m.putWriter(w)
+
+			// we only create a parent object on a job on the first write
+			// this way, if it is nil and we are working the right edge, we know when to skip
+			if p == nil {
+				h.parent = &hasherJob{
+					dataOffset:  dataOffset,
+					levelOffset: (levelOffset-1)/uint64(m.branches) + 1,
+					writer:      m.getWriter(),
+				}
+
+				atomic.AddUint32(&m.debugJobParent, 1)
+				log.Debug("set parent", "child", fmt.Sprintf("%p", h), "parent", fmt.Sprintf("%p", h.parent))
+			}
+			// write to the parent job
+			// the section index to write to is divided by the branches
+			m.write(h.parent, (index-1)/m.branches, s, false)
+
+			log.Debug("hash result", "s", hexutil.Encode(s), "length", thisJobLength)
+		}(h.dataOffset, h.levelOffset, h.writer, h.parent)
+
+		newLevelOffset := dataOffset + newcount - 1
+		var sameParent bool
+		if newLevelOffset%m.parentBatchSize > 0 {
+			sameParent = true
+		}
+		newDataOffset := dataOffset
+		if groundlevel {
+			newDataOffset += newcount - 1
+		}
+
+		// TODO edge
+		h.reset(m.getWriter(), newDataOffset, newLevelOffset, 0)
+
+		// groundlevel is synchronous, so we don't have to worry about race here
+		atomic.AddUint32(&m.debugJobChange, 1)
+		log.Debug("changing jobs", "dataoffset", h.dataOffset, "leveloffset", h.levelOffset, "sameparent", sameParent, "groundlevel", groundlevel)
+
+	}
+}
+
+func (m *FileMuxer) isBalancedBoundary(count uint64) bool {
+	_, ok := m.balancedTable[count]
+	return ok
+}

From 10c9e97106dc271b4ca759d0c747344f9461a506 Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 15 Mar 2019 10:31:34 +0100
Subject: [PATCH 49/50] swarm/storage: WIP Add comments

---
 swarm/storage/filehasher_test.go     |  10 +-
 swarm/storage/filehasher_thethird.go | 309 +++++++++++++++++++--------
 2 files changed, 228 insertions(+), 91 deletions(-)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index 04c4484737..a6c8cd22cb 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -100,7 +100,7 @@ func TestNewFileHasher(t *testing.T) {
 			SectionWriter: newAsyncHasher(),
 		}
 	}
-	fh, err := NewFileMuxer(hashFunc)
+	fh, err := NewFileMuxer(hashFunc, writerModeGC)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -121,7 +121,7 @@ func TestNewFileHasher(t *testing.T) {
 			offset += 32
 		}
 		time.Sleep(time.Second * 2)
-		t.Logf("debug parent: %d - change %d", fh.debugJobParent, fh.debugJobChange)
+		t.Logf("debug create: %d - change %d", fh.debugJobCreate, fh.debugJobChange)
 		t.Logf("debug bytes top: %x", fh.topJob.debugHash)
 	}
 }
@@ -143,13 +143,15 @@ func benchmarkNewFileHasher(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		hashFunc := func() SectionHasherTwo {
 			return &wrappedHasher{
-				SectionWriter: newAsyncHasher(),
+				//SectionWriter: newAsyncHasher(),
+				SectionWriter: newTreeHasherWrapper(),
 			}
 		}
-		fh, err := NewFileMuxer(hashFunc)
+		fh, err := NewFileMuxer(hashFunc, writerModePool)
 		if err != nil {
 			b.Fatal(err)
 		}
+		_ = SectionHasherTwo(fh)
 		l := int64(32)
 		offset := int64(0)
 		for j := int64(0); j < dataLength; j += 32 {
diff --git a/swarm/storage/filehasher_thethird.go b/swarm/storage/filehasher_thethird.go
index 7dd5d3211c..1f3f4e6f0a 100644
--- a/swarm/storage/filehasher_thethird.go
+++ b/swarm/storage/filehasher_thethird.go
@@ -18,6 +18,12 @@ const (
 	defaultSegmentSize = 32
 )
 
+const (
+	writerModePool   = iota // use sync.Pool for managing hasher allocation
+	writerModeGC            // only allocate new hashers, rely on GC to reap them
+	writerModeManual        // handle a pre-allocated hasher pool with buffered channels
+)
+
 var (
 	hashPool            sync.Pool
 	mockPadding         = [defaultPadSize * defaultSegmentSize]byte{}
@@ -30,7 +36,7 @@ func init() {
 	}
 	hashPool.New = func() interface{} {
 
-		pool = bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
+		pool := bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
 		h := bmt.New(pool)
 		return h.NewAsyncWriter(false)
 	}
@@ -45,18 +51,53 @@ func putHasher(h bmt.SectionWriter) {
 	hashPool.Put(h)
 }
 
+// defines the chained writer interface
 type SectionHasherTwo interface {
 	bmt.SectionWriter
-	// Provides:
-	//	Reset()                                       // standard init to be called before reuse
-	//	Write(index int, data []byte)                 // write into section of index
-	//	Sum(b []byte, length int, span []byte) []byte // returns the hash of the buffer
-	//	SectionSize() int                             // size of the async section unit to use
-
 	BatchSize() uint64 // sections to write before sum should be called
 	PadSize() uint64   // additional sections that will be written on sum
 }
 
+// used for benchmarks against pyramid hasher which uses sync hasher
+type treeHasherWrapper struct {
+	*bmt.Hasher
+}
+
+func newTreeHasherWrapper() *treeHasherWrapper {
+	pool := bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
+	h := bmt.New(pool)
+	return &treeHasherWrapper{
+		Hasher: h,
+	}
+}
+
+// implements SectionHasherTwo
+func (h *treeHasherWrapper) Write(index int, b []byte) {
+	h.Hasher.Write(b)
+}
+
+// implements SectionHasherTwo
+func (h *treeHasherWrapper) Sum(b []byte, length int, span []byte) []byte {
+	return h.Hasher.Sum(b)
+}
+
+// implements SectionHasherTwo
+func (h *treeHasherWrapper) BatchSize() uint64 {
+	return 128
+}
+
+// implements SectionHasherTwo
+func (h *treeHasherWrapper) PadSize() uint64 {
+	return 0
+}
+
+// implements SectionHasherTwo
+func (h *treeHasherWrapper) SectionSize() int {
+	return 32
+}
+
+// FileChunker is a chainable FileHasher writer that creates chunks on write and sum
+// TODO not implemented
 type FileChunker struct {
 	branches uint64
 }
@@ -68,33 +109,39 @@ func NewFileChunker() *FileChunker {
 
 }
 
+// implements SectionHasherTwo
 func (f *FileChunker) Write(index int, b []byte) {
 	log.Trace("got write", "b", len(b))
 }
 
+// implements SectionHasherTwo
 func (f *FileChunker) Sum(b []byte, length int, span []byte) []byte {
 	log.Warn("got sum", "b", hexutil.Encode(b), "span", span)
 	return b[:f.SectionSize()]
 }
 
+// implements SectionHasherTwo
 func (f *FileChunker) BatchSize() uint64 {
 	return branches
 }
 
+// implements SectionHasherTwo
 func (f *FileChunker) PadSize() uint64 {
 	return 0
 }
 
+// implements SectionHasherTwo
 func (f *FileChunker) SectionSize() int {
 	return 32
 }
 
+// implements SectionHasherTwo
 func (f *FileChunker) Reset() {
 	return
 }
 
-// Pads data on hashing
-// will be erasure coding behavior
+// FilePadder is a chainable FileHasher writer that pads the data written to it on sum
+// illustrates possible erasure coding interface
 type FilePadder struct {
 	hasher bmt.SectionWriter
 	writer SectionHasherTwo
@@ -117,20 +164,24 @@ func NewFilePadder(writer SectionHasherTwo) *FilePadder {
 	return p
 }
 
+// implements SectionHasherTwo
 func (p *FilePadder) BatchSize() uint64 {
 	return p.writer.BatchSize() - p.PadSize()
 }
 
+// implements SectionHasherTwo
 func (p *FilePadder) PadSize() uint64 {
 	return 18
 }
 
+// implements SectionHasherTwo
 func (p *FilePadder) Size() int {
 	return p.hasher.SectionSize()
 }
 
+// implements SectionHasherTwo
 // ignores index
-// TODO Write should return write count
+// TODO bmt.SectionWriter.Write interface should return write count
 func (p *FilePadder) Write(index int, b []byte) {
 	//log.Debug("padder write", "index", index, "l", len(b), "c", atomic.AddUint64(&p.debugSize, uint64(len(b))))
 	log.Debug("padder write", "index", index, "l", len(b))
@@ -147,12 +198,7 @@ func (p *FilePadder) writeBuffer(index int, b []byte) {
 	copy(p.buffer[bytesIndex:], b[:p.SectionSize()])
 }
 
-// performs data padding on the supplied data
-// returns padding
-func (p *FilePadder) pad(b []byte) []byte {
-	return mockPadding[:]
-}
-
+// implements SectionHasherTwo
 // ignores span
 func (p *FilePadder) Sum(b []byte, length int, span []byte) []byte {
 	var writeCount int
@@ -184,16 +230,25 @@ func (p *FilePadder) Sum(b []byte, length int, span []byte) []byte {
 	return s
 }
 
+// implements SectionHasherTwo
 func (p *FilePadder) Reset() {
 	p.hasher = getHasher()
 	p.buffer = make([]byte, (p.PadSize()+p.BatchSize())*uint64(p.SectionSize()))
 }
 
+// implements SectionHasherTwo
 // panics if called after sum and before reset
 func (p *FilePadder) SectionSize() int {
 	return p.hasher.SectionSize()
 }
 
+// performs data padding on the supplied data
+// returns padding
+func (p *FilePadder) pad(b []byte) []byte {
+	return mockPadding[:]
+}
+
+// utility structure for controlling asynchronous tree hashing of the file
 type hasherJob struct {
 	parent        *hasherJob
 	dataOffset    uint64 // global write count this job represents
@@ -205,6 +260,8 @@ type hasherJob struct {
 	writer        SectionHasherTwo
 }
 
+// reuse hasherjob with new offsets
+// not thread-safe
 func (h *hasherJob) reset(w SectionHasherTwo, dataOffset uint64, levelOffset uint64, edge int) {
 	h.debugLifetime++
 	h.count = 0
@@ -230,36 +287,62 @@ type FileMuxer struct {
 	targetCount     uint64            // set when sum is called, is total length of data
 	targetLevel     int               // set when sum is called, is tree level of root chunk
 	balancedTable   map[uint64]uint64 // maps write counts to bytecounts for
-	debugJobChange  uint32
-	debugJobParent  uint32
+	debugJobChange  uint32            // debug counter for job reset calls
+	debugJobCreate  uint32            // debug counter for new job allocations
 
-	writerQueue chan struct{}
-	writerPool  sync.Pool // chained writers providing hashing
-	jobMu       sync.Mutex
+	getWriter  func() SectionHasherTwo // mode-dependent function to assign hasher
+	putWriter  func(SectionHasherTwo)  // mode-dependent function to release hasher
+	writerFunc func() SectionHasherTwo // hasher function used by manual and GC modes
+
+	writerQueue       chan struct{}         // throttles allocation of hashers
+	writerPool        sync.Pool             // chained writers providing hashing in Pool mode
+	writerManualQueue chan SectionHasherTwo // chained writers providing hashing in Manual mode
 }
 
-func NewFileMuxer(writerFunc func() SectionHasherTwo) (*FileMuxer, error) {
+func NewFileMuxer(writerFunc func() SectionHasherTwo, mode int) (*FileMuxer, error) {
+
 	if writerFunc == nil {
 		return nil, errors.New("writer cannot be nil")
 	}
-	writer := writerFunc()
+
+	// create new instance and cache frequenctly used values
 	branches := writer.BatchSize() + writer.PadSize()
+	writer := writerFunc()
 	f := &FileMuxer{
 		branches:        int(branches),
 		sectionSize:     writer.SectionSize(),
 		writerBatchSize: writer.BatchSize(),
 		parentBatchSize: writer.BatchSize() * branches,
 		writerPadSize:   writer.PadSize(),
-		writerQueue:     make(chan struct{}, 1024),
-		balancedTable:   make(map[uint64]uint64),
-	}
-	f.writerPool.New = func() interface{} {
-		return writerFunc()
+		//writerQueue:     make(chan struct{}, 1000),
+		balancedTable: make(map[uint64]uint64),
+		writerFunc:    writerFunc,
 	}
-	for i := 0; i < 1000; i++ {
-		f.writerPool.Put(f.writerPool.Get())
+
+	// see writerMode*
+	switch mode {
+	case writerModeManual:
+		f.writerManualQueue = make(chan SectionHasherTwo, 1000)
+
+		for i := 0; i < 1000; i++ {
+			f.writerManualQueue <- writerFunc()
+		}
+		f.getWriter = f.getWriterManual
+		f.putWriter = f.putWriterManual
+	case writerModeGC:
+
+		f.getWriter = f.getWriterGC
+		f.putWriter = f.putWriterGC
+
+	case writerModePool:
+		f.writerPool.New = func() interface{} {
+			return writerFunc()
+		}
+		f.getWriter = f.getWriterPool
+		f.putWriter = f.putWriterPool
 	}
 
+	// create lookup table for data write counts that result in balanced trees
 	lastBoundary := uint64(1)
 	f.balancedTable[lastBoundary] = uint64(f.sectionSize)
 	for i := 1; i < 9; i++ {
@@ -267,38 +350,31 @@ func NewFileMuxer(writerFunc func() SectionHasherTwo) (*FileMuxer, error) {
 		f.balancedTable[lastBoundary] = lastBoundary * uint64(f.sectionSize)
 	}
 
+	// create the hasherJob object for the data level.
 	f.lastJob = &hasherJob{
 		writer: f.getWriter(),
 	}
 	f.topJob = f.lastJob
 
-	//log.Info("init", "fh", f, "table", f.balancedTable, "writer", writer.BatchSize())
 	return f, nil
 }
 
-func (m *FileMuxer) getWriter() SectionHasherTwo {
-	//m.writerQueue <- struct{}{}
-	return m.writerPool.Get().(SectionHasherTwo)
-}
-
-func (m *FileMuxer) putWriter(writer SectionHasherTwo) {
-	writer.Reset()
-	m.writerPool.Put(writer)
-	//<-m.writerQueue
-}
-
+// implements SectionHasherTwo
 func (m *FileMuxer) BatchSize() uint64 {
 	return m.writerBatchSize + m.writerPadSize
 }
 
+// implements SectionHasherTwo
 func (m *FileMuxer) PadSize() uint64 {
 	return 0
 }
 
+// implements SectionHasherTwo
 func (m *FileMuxer) SectionSize() int {
 	return m.sectionSize
 }
 
+// implements SectionHasherTwo
 func (m *FileMuxer) Write(index int, b []byte) {
 	//log.Trace("data write", "offset", index, "jobcount", m.lastJob.count, "batchsize", m.writerBatchSize)
 
@@ -306,6 +382,20 @@ func (m *FileMuxer) Write(index int, b []byte) {
 	m.lastWrite++
 }
 
+// implements SectionHasherTwo
+// TODO is noop
+func (m *FileMuxer) Sum(b []byte, length int, span []byte) []byte {
+	log.Warn("filemux sum called, not implemented", "b", b, "l", length, "span", span)
+	return nil
+}
+
+// implements SectionHasherTwo
+// TODO is noop
+func (m *FileMuxer) Reset() {
+	log.Warn("filemux reset called, not implemented")
+}
+
+// handles recursive writing across tree levels
 // b byte is not thread safe
 // index is internal within a job (batchsize / sectionsize)
 func (m *FileMuxer) write(h *hasherJob, index int, b []byte, groundlevel bool) {
@@ -328,56 +418,15 @@ func (m *FileMuxer) write(h *hasherJob, index int, b []byte, groundlevel bool) {
 	// check threshold or done
 	if newcount == m.writerBatchSize || h.edge > 0 {
 
-		// copy the vars at the time of call
-		dataOffset := h.dataOffset
-
 		//go func(index int, w SectionHasherTwo, p *hasherJob) {
-		go func(dataOffset uint64, levelOffset uint64, w SectionHasherTwo, p *hasherJob) {
-			thisJobLength := (newcount * uint64(m.sectionSize)) + uint64(len(b)%m.sectionSize)
-
-			// span is the total size under the chunk
-			// BUG dataoffset needs modulo levelindex
-			spanBytes := make([]byte, 8)
-
-			binary.LittleEndian.PutUint64(spanBytes, uint64(dataOffset+thisJobLength))
-
-			log.Debug("jobwrite sum", "w", fmt.Sprintf("%p", w), "l", thisJobLength, "span", spanBytes)
-			// sum the data using the chained writer
-
-			s := w.Sum(
-				nil,
-				int(thisJobLength),
-				spanBytes,
-			)
-
-			// reset the chained writer
-			m.putWriter(w)
-
-			// we only create a parent object on a job on the first write
-			// this way, if it is nil and we are working the right edge, we know when to skip
-			if p == nil {
-				h.parent = &hasherJob{
-					dataOffset:  dataOffset,
-					levelOffset: (levelOffset-1)/uint64(m.branches) + 1,
-					writer:      m.getWriter(),
-				}
-
-				atomic.AddUint32(&m.debugJobParent, 1)
-				log.Debug("set parent", "child", fmt.Sprintf("%p", h), "parent", fmt.Sprintf("%p", h.parent))
-			}
-			// write to the parent job
-			// the section index to write to is divided by the branches
-			m.write(h.parent, (index-1)/m.branches, s, false)
-
-			log.Debug("hash result", "s", hexutil.Encode(s), "length", thisJobLength)
-		}(h.dataOffset, h.levelOffset, h.writer, h.parent)
-
-		newLevelOffset := dataOffset + newcount - 1
+		go m.sum(b, index, newcount, h.dataOffset, h.levelOffset, h, h.writer, h.parent)
+
+		newLevelOffset := h.dataOffset + newcount - 1
 		var sameParent bool
 		if newLevelOffset%m.parentBatchSize > 0 {
 			sameParent = true
 		}
-		newDataOffset := dataOffset
+		newDataOffset := h.dataOffset
 		if groundlevel {
 			newDataOffset += newcount - 1
 		}
@@ -392,6 +441,92 @@ func (m *FileMuxer) write(h *hasherJob, index int, b []byte, groundlevel bool) {
 	}
 }
 
+// handles recursive feedback writes of chained sum call
+// as the hasherJob from the context calling this is asynchronously reset
+// the relevant values to use for calculation must be copied
+// if parent doesn't exist (new level) a new one is created
+// releases the hasher used by the hasherJob at time of calling this method
+func (m *FileMuxer) sum(b []byte, index int, count uint64, dataOffset uint64, levelOffset uint64, job *hasherJob, w SectionHasherTwo, p *hasherJob) {
+
+	thisJobLength := (count * uint64(m.sectionSize)) + uint64(len(b)%m.sectionSize)
+
+	// span is the total size under the chunk
+	// BUG dataoffset needs modulo levelindex
+	spanBytes := make([]byte, 8)
+
+	binary.LittleEndian.PutUint64(spanBytes, uint64(dataOffset+thisJobLength))
+
+	log.Debug("jobwrite sum", "w", fmt.Sprintf("%p", w), "l", thisJobLength, "span", spanBytes)
+	// sum the data using the chained writer
+
+	s := w.Sum(
+		nil,
+		int(thisJobLength),
+		spanBytes,
+	)
+
+	// reset the chained writer
+	m.putWriter(w)
+
+	// we only create a parent object on a job on the first write
+	// this way, if it is nil and we are working the right edge, we know when to skip
+	if p == nil {
+		job.parent = m.newJob(dataOffset, levelOffset)
+		atomic.AddUint32(&m.debugJobCreate, 1)
+		log.Debug("set parent", "child", fmt.Sprintf("%p", job), "parent", fmt.Sprintf("%p", job.parent))
+	}
+	// write to the parent job
+	// the section index to write to is divided by the branches
+	m.write(job.parent, (index-1)/m.branches, s, false)
+
+	log.Debug("hash result", "s", hexutil.Encode(s), "length", thisJobLength)
+
+}
+
+// creates a new hasherJob
+func (m *FileMuxer) newJob(dataOffset uint64, levelOffset uint64) *hasherJob {
+	return &hasherJob{
+		dataOffset:  dataOffset,
+		levelOffset: (levelOffset-1)/uint64(m.branches) + 1,
+		writer:      m.getWriter(),
+	}
+}
+
+// see writerMode consts
+func (m *FileMuxer) getWriterGC() SectionHasherTwo {
+	return m.writerFunc()
+}
+
+// see writerMode consts
+func (m *FileMuxer) putWriterGC(w SectionHasherTwo) {
+	// noop
+}
+
+// see writerMode consts
+func (m *FileMuxer) getWriterPool() SectionHasherTwo {
+	//m.writerQueue <- struct{}{}
+	return m.writerPool.Get().(SectionHasherTwo)
+}
+
+// see writerMode consts
+func (m *FileMuxer) putWriterPool(writer SectionHasherTwo) {
+	writer.Reset()
+	m.writerPool.Put(writer)
+	//<-m.writerQueue
+}
+
+// see writerMode consts
+func (m *FileMuxer) getWriterManual() SectionHasherTwo {
+	return <-m.writerManualQueue
+}
+
+// see writerMode consts
+func (m *FileMuxer) putWriterManual(writer SectionHasherTwo) {
+	writer.Reset()
+	m.writerManualQueue <- writer
+}
+
+// calculates if the given data write length results in a balanced tree
 func (m *FileMuxer) isBalancedBoundary(count uint64) bool {
 	_, ok := m.balancedTable[count]
 	return ok

From 2c0e5d4b3d274fc0c23a1effbe3aad5f29eea9cd Mon Sep 17 00:00:00 2001
From: lash <dev@holbrook.no>
Date: Fri, 15 Mar 2019 15:04:30 +0100
Subject: [PATCH 50/50] swarm/storage: Factor sum to separate function, added
 write debugs

---
 swarm/storage/filehasher_test.go     |  31 ++++--
 swarm/storage/filehasher_thethird.go | 159 +++++++++++++++------------
 2 files changed, 108 insertions(+), 82 deletions(-)

diff --git a/swarm/storage/filehasher_test.go b/swarm/storage/filehasher_test.go
index a6c8cd22cb..567af21ae2 100644
--- a/swarm/storage/filehasher_test.go
+++ b/swarm/storage/filehasher_test.go
@@ -90,7 +90,7 @@ func newAsyncHasher() bmt.SectionWriter {
 	return h.NewAsyncWriter(false)
 }
 
-func TestNewFileHasher(t *testing.T) {
+func TestChainedFileHasher(t *testing.T) {
 	chunker := &FileChunker{}
 	hashFunc := func() SectionHasherTwo {
 		return SectionHasherTwo(NewFilePadder(chunker))
@@ -100,7 +100,11 @@ func TestNewFileHasher(t *testing.T) {
 			SectionWriter: newAsyncHasher(),
 		}
 	}
-	fh, err := NewFileMuxer(hashFunc, writerModeGC)
+	//	hashFunc = func() SectionHasherTwo {
+	//		return newTreeHasherWrapper()
+	//	}
+
+	fh, err := NewFileSplitter(hashFunc, writerModeGC)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -121,18 +125,23 @@ func TestNewFileHasher(t *testing.T) {
 			offset += 32
 		}
 		time.Sleep(time.Second * 2)
+		refHash := fh.Sum(nil, 0, nil)
+		_ = refHash // nothing yet
 		t.Logf("debug create: %d - change %d", fh.debugJobCreate, fh.debugJobChange)
 		t.Logf("debug bytes top: %x", fh.topJob.debugHash)
+		for j, w := range fh.debugWrites {
+			t.Logf("%s: %v", j, w)
+		}
 	}
 }
 
-func BenchmarkNewFileHasher(b *testing.B) {
+func BenchmarkChainedFileHasher(b *testing.B) {
 	for i := start; i < end; i++ {
-		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkNewFileHasher)
+		b.Run(fmt.Sprintf("%d", dataLengths[i]), benchmarkChainedFileHasher)
 	}
 }
 
-func benchmarkNewFileHasher(b *testing.B) {
+func benchmarkChainedFileHasher(b *testing.B) {
 	params := strings.Split(b.Name(), "/")
 	dataLength, err := strconv.ParseInt(params[1], 10, 64)
 	if err != nil {
@@ -142,12 +151,12 @@ func benchmarkNewFileHasher(b *testing.B) {
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		hashFunc := func() SectionHasherTwo {
-			return &wrappedHasher{
-				//SectionWriter: newAsyncHasher(),
-				SectionWriter: newTreeHasherWrapper(),
-			}
+			return newTreeHasherWrapper()
+			// return &wrappedHasher{
+			// 	SectionWriter: newAsyncHasher(),
+			// }
 		}
-		fh, err := NewFileMuxer(hashFunc, writerModePool)
+		fh, err := NewFileSplitter(hashFunc, writerModePool)
 		if err != nil {
 			b.Fatal(err)
 		}
@@ -162,7 +171,7 @@ func benchmarkNewFileHasher(b *testing.B) {
 			fh.Write(int(offset/32), data[offset:offset+l])
 			offset += 32
 		}
-		//fh.Finish(nil)
+		//refHash := fh.Sum(nil, 0, nil)
 	}
 }
 
diff --git a/swarm/storage/filehasher_thethird.go b/swarm/storage/filehasher_thethird.go
index 1f3f4e6f0a..700aafa26a 100644
--- a/swarm/storage/filehasher_thethird.go
+++ b/swarm/storage/filehasher_thethird.go
@@ -61,13 +61,16 @@ type SectionHasherTwo interface {
 // used for benchmarks against pyramid hasher which uses sync hasher
 type treeHasherWrapper struct {
 	*bmt.Hasher
+	zeroLength []byte
+	mu         sync.Mutex
 }
 
 func newTreeHasherWrapper() *treeHasherWrapper {
 	pool := bmt.NewTreePool(sha3.NewKeccak256, 128, bmt.PoolSize)
 	h := bmt.New(pool)
 	return &treeHasherWrapper{
-		Hasher: h,
+		Hasher:     h,
+		zeroLength: make([]byte, 8),
 	}
 }
 
@@ -96,6 +99,12 @@ func (h *treeHasherWrapper) SectionSize() int {
 	return 32
 }
 
+func (h *treeHasherWrapper) Reset() {
+	h.mu.Lock()
+	defer h.mu.Unlock()
+	h.Hasher.ResetWithLength(h.zeroLength)
+}
+
 // FileChunker is a chainable FileHasher writer that creates chunks on write and sum
 // TODO not implemented
 type FileChunker struct {
@@ -254,28 +263,20 @@ type hasherJob struct {
 	dataOffset    uint64 // global write count this job represents
 	levelOffset   uint64 // offset on this level
 	count         uint64 // amount of writes on this job
-	edge          int    // > 0 on last write, incremented by 1 every level traversed on right edge
+	edge          int    // > 0 on last write, incremented by 1 every level traversed on right edge, used to determine skipping levels on dangling chunk
 	debugHash     []byte
 	debugLifetime uint32
 	writer        SectionHasherTwo
 }
 
-// reuse hasherjob with new offsets
-// not thread-safe
-func (h *hasherJob) reset(w SectionHasherTwo, dataOffset uint64, levelOffset uint64, edge int) {
-	h.debugLifetime++
-	h.count = 0
-	h.dataOffset = dataOffset
-	h.levelOffset = levelOffset
-	h.writer = w
+func (h *hasherJob) inc() (uint64, uint64) {
+	oldCount := atomic.LoadUint64(&h.count)
+	newCount := atomic.AddUint64(&h.count, 1)
+	return oldCount, newCount
 }
 
-func (h *hasherJob) inc() uint64 {
-	return atomic.AddUint64(&h.count, 1)
-}
-
-// FileMuxer manages the build tree of the data
-type FileMuxer struct {
+// FileSplitter manages the build tree of the data
+type FileSplitter struct {
 	branches        int               // cached branch count
 	sectionSize     int               // cached segment size of writer
 	writerBatchSize uint64            // cached chunk size of chained writer
@@ -289,34 +290,37 @@ type FileMuxer struct {
 	balancedTable   map[uint64]uint64 // maps write counts to bytecounts for
 	debugJobChange  uint32            // debug counter for job reset calls
 	debugJobCreate  uint32            // debug counter for new job allocations
+	debugWrites     map[string][]int
 
 	getWriter  func() SectionHasherTwo // mode-dependent function to assign hasher
 	putWriter  func(SectionHasherTwo)  // mode-dependent function to release hasher
 	writerFunc func() SectionHasherTwo // hasher function used by manual and GC modes
+	writerMu   sync.Mutex
 
 	writerQueue       chan struct{}         // throttles allocation of hashers
 	writerPool        sync.Pool             // chained writers providing hashing in Pool mode
 	writerManualQueue chan SectionHasherTwo // chained writers providing hashing in Manual mode
 }
 
-func NewFileMuxer(writerFunc func() SectionHasherTwo, mode int) (*FileMuxer, error) {
+func NewFileSplitter(writerFunc func() SectionHasherTwo, mode int) (*FileSplitter, error) {
 
 	if writerFunc == nil {
 		return nil, errors.New("writer cannot be nil")
 	}
 
 	// create new instance and cache frequenctly used values
-	branches := writer.BatchSize() + writer.PadSize()
 	writer := writerFunc()
-	f := &FileMuxer{
+	branches := writer.BatchSize() + writer.PadSize()
+	f := &FileSplitter{
 		branches:        int(branches),
 		sectionSize:     writer.SectionSize(),
 		writerBatchSize: writer.BatchSize(),
 		parentBatchSize: writer.BatchSize() * branches,
 		writerPadSize:   writer.PadSize(),
-		//writerQueue:     make(chan struct{}, 1000),
-		balancedTable: make(map[uint64]uint64),
-		writerFunc:    writerFunc,
+		writerQueue:     make(chan struct{}, 1000),
+		balancedTable:   make(map[uint64]uint64),
+		writerFunc:      writerFunc,
+		debugWrites:     make(map[string][]int),
 	}
 
 	// see writerMode*
@@ -360,22 +364,22 @@ func NewFileMuxer(writerFunc func() SectionHasherTwo, mode int) (*FileMuxer, err
 }
 
 // implements SectionHasherTwo
-func (m *FileMuxer) BatchSize() uint64 {
+func (m *FileSplitter) BatchSize() uint64 {
 	return m.writerBatchSize + m.writerPadSize
 }
 
 // implements SectionHasherTwo
-func (m *FileMuxer) PadSize() uint64 {
+func (m *FileSplitter) PadSize() uint64 {
 	return 0
 }
 
 // implements SectionHasherTwo
-func (m *FileMuxer) SectionSize() int {
+func (m *FileSplitter) SectionSize() int {
 	return m.sectionSize
 }
 
 // implements SectionHasherTwo
-func (m *FileMuxer) Write(index int, b []byte) {
+func (m *FileSplitter) Write(index int, b []byte) {
 	//log.Trace("data write", "offset", index, "jobcount", m.lastJob.count, "batchsize", m.writerBatchSize)
 
 	m.write(m.lastJob, index%m.branches, b, true)
@@ -384,60 +388,60 @@ func (m *FileMuxer) Write(index int, b []byte) {
 
 // implements SectionHasherTwo
 // TODO is noop
-func (m *FileMuxer) Sum(b []byte, length int, span []byte) []byte {
-	log.Warn("filemux sum called, not implemented", "b", b, "l", length, "span", span)
+func (m *FileSplitter) Sum(b []byte, length int, span []byte) []byte {
+	log.Warn("filesplitter sum called, not implemented", "b", b, "l", length, "span", span)
 	return nil
 }
 
 // implements SectionHasherTwo
 // TODO is noop
-func (m *FileMuxer) Reset() {
-	log.Warn("filemux reset called, not implemented")
+func (m *FileSplitter) Reset() {
+	close(m.writerQueue)
+	log.Warn("filesplitter reset called, not implemented")
 }
 
 // handles recursive writing across tree levels
 // b byte is not thread safe
 // index is internal within a job (batchsize / sectionsize)
-func (m *FileMuxer) write(h *hasherJob, index int, b []byte, groundlevel bool) {
+func (m *FileSplitter) write(h *hasherJob, index int, b []byte, groundlevel bool) {
 
 	// if we are crossing a batch write size, we spawn a new job
 	// and point the data writer's job pointer lastJob to it
-	newcount := h.inc()
-	if newcount > m.writerBatchSize {
-	}
+	// TODO pass it through write() instead
+	oldcount, newcount := h.inc()
+
+	// write the data to the chain
+	m.writerMu.Lock()
+	w := h.writer
+	m.debugWrites[fmt.Sprintf("%p", w)] = append(m.debugWrites[fmt.Sprintf("%p", w)], index)
+	m.writerMu.Unlock()
+	lifetime := atomic.LoadUint32(&h.debugLifetime)
+	log.Trace("job write", "job", fmt.Sprintf("%p", h), "w", fmt.Sprintf("%p", w), "oldcount", oldcount, "newcount", newcount, "index", index, "lifetime", lifetime, "data", hexutil.Encode(b))
+	w.Write(index, b)
 
-	// write the data to the chain and sum it if:
+	// sum data if:
 	// * the write is on a threshold, or
 	// * if we're done writing
-	//go func(h *hasherJob, newcount uint64, index int, b []byte) {
-	lifetime := atomic.LoadUint32(&h.debugLifetime)
-	log.Trace("job write", "job", fmt.Sprintf("%p", h), "w", fmt.Sprintf("%p", h.writer), "count", newcount, "index", index, "lifetime", lifetime, "data", hexutil.Encode(b))
-	// write to the chained writer
-	h.writer.Write(index, b)
-
-	// check threshold or done
 	if newcount == m.writerBatchSize || h.edge > 0 {
 
-		//go func(index int, w SectionHasherTwo, p *hasherJob) {
-		go m.sum(b, index, newcount, h.dataOffset, h.levelOffset, h, h.writer, h.parent)
+		// we use oldcount here to do one less operation when calculating thisJobLength
+		go m.sum(b, index, oldcount, h.dataOffset, h.levelOffset, h, w, h.parent)
 
-		newLevelOffset := h.dataOffset + newcount - 1
-		var sameParent bool
-		if newLevelOffset%m.parentBatchSize > 0 {
-			sameParent = true
-		}
+		// after sum we reuse the hasherJob object
+		// but we need to update the levelOffset which we use in sum
+		// to calculate the span data embedded in the resulting data
+		newLevelOffset := h.dataOffset + newcount
+
+		// if we are on the data level, the dataOffset should be incremented aswell
 		newDataOffset := h.dataOffset
 		if groundlevel {
-			newDataOffset += newcount - 1
+			newDataOffset += newcount
 		}
 
-		// TODO edge
-		h.reset(m.getWriter(), newDataOffset, newLevelOffset, 0)
-
-		// groundlevel is synchronous, so we don't have to worry about race here
+		// TODO edge need to be set here when we implement the right edge finish write
+		m.reset(h, m.getWriter(), newDataOffset, newLevelOffset, 0)
 		atomic.AddUint32(&m.debugJobChange, 1)
-		log.Debug("changing jobs", "dataoffset", h.dataOffset, "leveloffset", h.levelOffset, "sameparent", sameParent, "groundlevel", groundlevel)
-
+		log.Debug("changing jobs", "dataoffset", h.dataOffset, "leveloffset", h.levelOffset, "groundlevel", groundlevel)
 	}
 }
 
@@ -446,19 +450,17 @@ func (m *FileMuxer) write(h *hasherJob, index int, b []byte, groundlevel bool) {
 // the relevant values to use for calculation must be copied
 // if parent doesn't exist (new level) a new one is created
 // releases the hasher used by the hasherJob at time of calling this method
-func (m *FileMuxer) sum(b []byte, index int, count uint64, dataOffset uint64, levelOffset uint64, job *hasherJob, w SectionHasherTwo, p *hasherJob) {
+func (m *FileSplitter) sum(b []byte, index int, oldcount uint64, dataOffset uint64, levelOffset uint64, job *hasherJob, w SectionHasherTwo, p *hasherJob) {
 
-	thisJobLength := (count * uint64(m.sectionSize)) + uint64(len(b)%m.sectionSize)
+	thisJobLength := (oldcount * uint64(m.sectionSize)) + uint64(len(b))
 
 	// span is the total size under the chunk
 	// BUG dataoffset needs modulo levelindex
 	spanBytes := make([]byte, 8)
-
 	binary.LittleEndian.PutUint64(spanBytes, uint64(dataOffset+thisJobLength))
 
-	log.Debug("jobwrite sum", "w", fmt.Sprintf("%p", w), "l", thisJobLength, "span", spanBytes)
 	// sum the data using the chained writer
-
+	log.Debug("jobwrite sum", "w", fmt.Sprintf("%p", w), "l", thisJobLength, "lastwritelocalindex", oldcount, "span", spanBytes)
 	s := w.Sum(
 		nil,
 		int(thisJobLength),
@@ -471,20 +473,21 @@ func (m *FileMuxer) sum(b []byte, index int, count uint64, dataOffset uint64, le
 	// we only create a parent object on a job on the first write
 	// this way, if it is nil and we are working the right edge, we know when to skip
 	if p == nil {
-		job.parent = m.newJob(dataOffset, levelOffset)
+		p = m.newJob(dataOffset, levelOffset)
+		job.parent = p
 		atomic.AddUint32(&m.debugJobCreate, 1)
 		log.Debug("set parent", "child", fmt.Sprintf("%p", job), "parent", fmt.Sprintf("%p", job.parent))
 	}
 	// write to the parent job
 	// the section index to write to is divided by the branches
-	m.write(job.parent, (index-1)/m.branches, s, false)
+	m.write(p, (index-1)/m.branches, s, false)
 
 	log.Debug("hash result", "s", hexutil.Encode(s), "length", thisJobLength)
 
 }
 
 // creates a new hasherJob
-func (m *FileMuxer) newJob(dataOffset uint64, levelOffset uint64) *hasherJob {
+func (m *FileSplitter) newJob(dataOffset uint64, levelOffset uint64) *hasherJob {
 	return &hasherJob{
 		dataOffset:  dataOffset,
 		levelOffset: (levelOffset-1)/uint64(m.branches) + 1,
@@ -493,41 +496,55 @@ func (m *FileMuxer) newJob(dataOffset uint64, levelOffset uint64) *hasherJob {
 }
 
 // see writerMode consts
-func (m *FileMuxer) getWriterGC() SectionHasherTwo {
+func (m *FileSplitter) getWriterGC() SectionHasherTwo {
 	return m.writerFunc()
 }
 
 // see writerMode consts
-func (m *FileMuxer) putWriterGC(w SectionHasherTwo) {
+func (m *FileSplitter) putWriterGC(w SectionHasherTwo) {
 	// noop
 }
 
 // see writerMode consts
-func (m *FileMuxer) getWriterPool() SectionHasherTwo {
+func (m *FileSplitter) getWriterPool() SectionHasherTwo {
 	//m.writerQueue <- struct{}{}
 	return m.writerPool.Get().(SectionHasherTwo)
 }
 
 // see writerMode consts
-func (m *FileMuxer) putWriterPool(writer SectionHasherTwo) {
+func (m *FileSplitter) putWriterPool(writer SectionHasherTwo) {
 	writer.Reset()
 	m.writerPool.Put(writer)
 	//<-m.writerQueue
 }
 
 // see writerMode consts
-func (m *FileMuxer) getWriterManual() SectionHasherTwo {
+func (m *FileSplitter) getWriterManual() SectionHasherTwo {
 	return <-m.writerManualQueue
 }
 
 // see writerMode consts
-func (m *FileMuxer) putWriterManual(writer SectionHasherTwo) {
+func (m *FileSplitter) putWriterManual(writer SectionHasherTwo) {
 	writer.Reset()
 	m.writerManualQueue <- writer
 }
 
+// resets a hasherJob for re-use.
+// It will recursively reset parents as long as the respective levelOffets
+// are on batch boundaries
+func (m *FileSplitter) reset(h *hasherJob, w SectionHasherTwo, dataOffset uint64, levelOffset uint64, edge int) {
+	h.debugLifetime++
+	h.count = 0
+	h.dataOffset = dataOffset
+	h.levelOffset = levelOffset
+	h.writer = w
+	if levelOffset%m.parentBatchSize == 0 && h.parent != nil {
+		m.reset(h.parent, m.getWriter(), dataOffset, levelOffset/m.writerBatchSize, edge+1)
+	}
+}
+
 // calculates if the given data write length results in a balanced tree
-func (m *FileMuxer) isBalancedBoundary(count uint64) bool {
+func (m *FileSplitter) isBalancedBoundary(count uint64) bool {
 	_, ok := m.balancedTable[count]
 	return ok
 }