Use a non-nil ValueReader in sequenceChunker.Done (#2273)

Fixes #2220.
attic-labs · Aug 5, 2016 · e50f773 · e50f773
1 parent 27c687a
commit e50f773
Show file tree

Hide file tree

Showing 17 changed files with 196 additions and 51 deletions.
diff --git a/go/types/blob.go b/go/types/blob.go
@@ -43,7 +43,7 @@ func (b Blob) Splice(idx uint64, deleteCount uint64, data []byte) Blob {
 	d.Chk.True(idx+deleteCount <= b.Len())
 
 	cur := newCursorAtIndex(b.seq, idx)
-	ch := newSequenceChunker(cur, nil, makeBlobLeafChunkFn(b.seq.valueReader()), newIndexedMetaSequenceChunkFn(BlobKind, b.seq.valueReader()), hashValueByte)
+	ch := newSequenceChunker(cur, b.seq.valueReader(), nil, makeBlobLeafChunkFn(b.seq.valueReader()), newIndexedMetaSequenceChunkFn(BlobKind, b.seq.valueReader()), hashValueByte)
 	for deleteCount > 0 {
 		ch.Skip()
 		deleteCount--
@@ -52,7 +52,7 @@ func (b Blob) Splice(idx uint64, deleteCount uint64, data []byte) Blob {
 	for _, v := range data {
 		ch.Append(v)
 	}
-	return newBlob(ch.Done(nil).(indexedSequence))
+	return newBlob(ch.Done().(indexedSequence))
 }
 
 // Collection interface
@@ -177,7 +177,7 @@ func NewBlob(r io.Reader) Blob {
 }
 
 func NewStreamingBlob(r io.Reader, vrw ValueReadWriter) Blob {
-	sc := newEmptySequenceChunker(vrw, makeBlobLeafChunkFn(nil), newIndexedMetaSequenceChunkFn(BlobKind, nil), func(item sequenceItem, rv *rollingValueHasher) {
+	sc := newEmptySequenceChunker(vrw, vrw, makeBlobLeafChunkFn(nil), newIndexedMetaSequenceChunkFn(BlobKind, nil), func(item sequenceItem, rv *rollingValueHasher) {
 		rv.HashByte(item.(byte))
 	})
 
@@ -247,5 +247,5 @@ func NewStreamingBlob(r io.Reader, vrw ValueReadWriter) Blob {
 		sc.parent.Append(b.(metaTuple))
 	}
 
-	return newBlob(sc.Done(vrw).(indexedSequence))
+	return newBlob(sc.Done().(indexedSequence))
 }
diff --git a/go/types/list.go b/go/types/list.go
@@ -20,22 +20,22 @@ func newList(seq indexedSequence) List {
 
 // NewList creates a new List where the type is computed from the elements in the list, populated with values, chunking if and when needed.
 func NewList(values ...Value) List {
-	seq := newEmptySequenceChunker(nil, makeListLeafChunkFn(nil), newIndexedMetaSequenceChunkFn(ListKind, nil), hashValueBytes)
+	seq := newEmptySequenceChunker(nil, nil, makeListLeafChunkFn(nil), newIndexedMetaSequenceChunkFn(ListKind, nil), hashValueBytes)
 	for _, v := range values {
 		seq.Append(v)
 	}
-	return newList(seq.Done(nil).(indexedSequence))
+	return newList(seq.Done().(indexedSequence))
 }
 
 // NewStreamingList creates a new List with type t, populated with values, chunking if and when needed. As chunks are created, they're written to vrw -- including the root chunk of the list. Once the caller has closed values, she can read the completed List from the returned channel.
 func NewStreamingList(vrw ValueReadWriter, values <-chan Value) <-chan List {
 	out := make(chan List)
 	go func() {
-		seq := newEmptySequenceChunker(vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes)
+		seq := newEmptySequenceChunker(vrw, vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes)
 		for v := range values {
 			seq.Append(v)
 		}
-		out <- newList(seq.Done(vrw).(indexedSequence))
+		out <- newList(seq.Done().(indexedSequence))
 		close(out)
 	}()
 	return out
@@ -134,7 +134,7 @@ func (l List) Splice(idx uint64, deleteCount uint64, vs ...Value) List {
 	d.Chk.True(idx+deleteCount <= l.Len())
 
 	cur := newCursorAtIndex(l.seq, idx)
-	ch := newSequenceChunker(cur, nil, makeListLeafChunkFn(l.seq.valueReader()), newIndexedMetaSequenceChunkFn(ListKind, l.seq.valueReader()), hashValueBytes)
+	ch := newSequenceChunker(cur, l.seq.valueReader(), nil, makeListLeafChunkFn(l.seq.valueReader()), newIndexedMetaSequenceChunkFn(ListKind, l.seq.valueReader()), hashValueBytes)
 	for deleteCount > 0 {
 		ch.Skip()
 		deleteCount--
@@ -143,7 +143,7 @@ func (l List) Splice(idx uint64, deleteCount uint64, vs ...Value) List {
 	for _, v := range vs {
 		ch.Append(v)
 	}
-	return newList(ch.Done(nil).(indexedSequence))
+	return newList(ch.Done().(indexedSequence))
 }
 
 func (l List) Insert(idx uint64, vs ...Value) List {

diff --git a/go/types/list_test.go b/go/types/list_test.go
@@ -1039,3 +1039,24 @@ func TestListTypeAfterMutations(t *testing.T) {
 	test(15, listLeafSequence{})
 	test(1500, indexedMetaSequence{})
 }
+
+func TestListRemoveLastWhenNotLoaded(t *testing.T) {
+	assert := assert.New(t)
+
+	smallTestChunks()
+	defer normalProductionChunks()
+
+	vs := NewTestValueStore()
+	reload := func(l List) List {
+		return vs.ReadValue(vs.WriteValue(l).TargetHash()).(List)
+	}
+
+	tl := newTestList(1024)
+	nl := tl.toList()
+
+	for len(tl) > 0 {
+		tl = tl[:len(tl)-1]
+		nl = reload(nl.RemoveAt(uint64(len(tl))))
+		assert.True(tl.toList().Equals(nl))
+	}
+}
diff --git a/go/types/map.go b/go/types/map.go
@@ -28,13 +28,13 @@ func mapHashValueBytes(item sequenceItem, rv *rollingValueHasher) {
 
 func NewMap(kv ...Value) Map {
 	entries := buildMapData(kv)
-	seq := newEmptySequenceChunker(nil, makeMapLeafChunkFn(nil), newOrderedMetaSequenceChunkFn(MapKind, nil), mapHashValueBytes)
+	seq := newEmptySequenceChunker(nil, nil, makeMapLeafChunkFn(nil), newOrderedMetaSequenceChunkFn(MapKind, nil), mapHashValueBytes)
 
 	for _, entry := range entries {
 		seq.Append(entry)
 	}
 
-	return newMap(seq.Done(nil).(orderedSequence))
+	return newMap(seq.Done().(orderedSequence))
 }
 
 func NewStreamingMap(vrw ValueReadWriter, kvs <-chan Value) <-chan Map {
@@ -174,7 +174,7 @@ func (m Map) Remove(k Value) Map {
 }
 
 func (m Map) splice(cur *sequenceCursor, deleteCount uint64, vs ...mapEntry) Map {
-	ch := newSequenceChunker(cur, nil, makeMapLeafChunkFn(m.seq.valueReader()), newOrderedMetaSequenceChunkFn(MapKind, m.seq.valueReader()), mapHashValueBytes)
+	ch := newSequenceChunker(cur, m.seq.valueReader(), nil, makeMapLeafChunkFn(m.seq.valueReader()), newOrderedMetaSequenceChunkFn(MapKind, m.seq.valueReader()), mapHashValueBytes)
 	for deleteCount > 0 {
 		ch.Skip()
 		deleteCount--
@@ -183,7 +183,7 @@ func (m Map) splice(cur *sequenceCursor, deleteCount uint64, vs ...mapEntry) Map
 	for _, v := range vs {
 		ch.Append(v)
 	}
-	return newMap(ch.Done(nil).(orderedSequence))
+	return newMap(ch.Done().(orderedSequence))
 }
 
 func (m Map) getCursorAtValue(v Value) (cur *sequenceCursor, found bool) {

diff --git a/go/types/map_test.go b/go/types/map_test.go
@@ -1131,3 +1131,27 @@ func TestCompoundMapWithValuesOfEveryType(t *testing.T) {
 		assert.Equal(len(kvs)/2, int(m.Len()))
 	}
 }
+
+func TestMapRemoveLastWhenNotLoaded(t *testing.T) {
+	assert := assert.New(t)
+
+	smallTestChunks()
+	defer normalProductionChunks()
+
+	vs := NewTestValueStore()
+	reload := func(m Map) Map {
+		return vs.ReadValue(vs.WriteValue(m).TargetHash()).(Map)
+	}
+
+	tm := getTestNativeOrderMap(4)
+	nm := tm.toMap()
+
+	for len(tm.entries) > 0 {
+		entr := tm.entries
+		last := entr[len(entr)-1]
+		entr = entr[:len(entr)-1]
+		tm.entries = entr
+		nm = reload(nm.Remove(last.key))
+		assert.True(tm.toMap().Equals(nm))
+	}
+}
diff --git a/go/types/mutator.go b/go/types/mutator.go
@@ -28,13 +28,13 @@ func (mx *mapMutator) Finish() Map {
 		mx.oc = nil
 	}()
 
-	seq := newEmptySequenceChunker(mx.vrw, makeMapLeafChunkFn(mx.vrw), newOrderedMetaSequenceChunkFn(MapKind, mx.vrw), mapHashValueBytes)
+	seq := newEmptySequenceChunker(mx.vrw, mx.vrw, makeMapLeafChunkFn(mx.vrw), newOrderedMetaSequenceChunkFn(MapKind, mx.vrw), mapHashValueBytes)
 
 	// I tried splitting this up so that the iteration ran in a separate goroutine from the Append'ing, but it actually made things a bit slower when I ran a test.
 	iter := mx.oc.NewIterator()
 	defer iter.Release()
 	for iter.Next() {
 		seq.Append(iter.Op())
 	}
-	return newMap(seq.Done(mx.vrw).(orderedSequence))
+	return newMap(seq.Done().(orderedSequence))
 }
diff --git a/go/types/sequence_chunker.go b/go/types/sequence_chunker.go
@@ -10,6 +10,7 @@ type hashValueBytesFn func(item sequenceItem, rv *rollingValueHasher)
 
 type sequenceChunker struct {
 	cur                        *sequenceCursor
+	vr                         ValueReader
 	vw                         ValueWriter
 	parent                     *sequenceChunker
 	current                    []sequenceItem
@@ -23,18 +24,19 @@ type sequenceChunker struct {
 // makeChunkFn takes a sequence of items to chunk, and returns the result of chunking those items, a tuple of a reference to that chunk which can itself be chunked + its underlying value.
 type makeChunkFn func(values []sequenceItem) (Collection, orderedKey, uint64)
 
-func newEmptySequenceChunker(vw ValueWriter, makeChunk, parentMakeChunk makeChunkFn, hashValueBytes hashValueBytesFn) *sequenceChunker {
-	return newSequenceChunker(nil, vw, makeChunk, parentMakeChunk, hashValueBytes)
+func newEmptySequenceChunker(vr ValueReader, vw ValueWriter, makeChunk, parentMakeChunk makeChunkFn, hashValueBytes hashValueBytesFn) *sequenceChunker {
+	return newSequenceChunker(nil, vr, vw, makeChunk, parentMakeChunk, hashValueBytes)
 }
 
-func newSequenceChunker(cur *sequenceCursor, vw ValueWriter, makeChunk, parentMakeChunk makeChunkFn, hashValueBytes hashValueBytesFn) *sequenceChunker {
+func newSequenceChunker(cur *sequenceCursor, vr ValueReader, vw ValueWriter, makeChunk, parentMakeChunk makeChunkFn, hashValueBytes hashValueBytesFn) *sequenceChunker {
 	// |cur| will be nil if this is a new sequence, implying this is a new tree, or the tree has grown in height relative to its original chunked form.
 	d.Chk.True(makeChunk != nil)
 	d.Chk.True(parentMakeChunk != nil)
 	d.Chk.True(hashValueBytes != nil)
 
 	sc := &sequenceChunker{
 		cur,
+		vr,
 		vw,
 		nil,
 		[]sequenceItem{},
@@ -156,7 +158,7 @@ func (sc *sequenceChunker) createParent() {
 		// Clone the parent cursor because otherwise calling cur.advance() will affect our parent - and vice versa - in surprising ways. Instead, Skip moves forward our parent's cursor if we advance across a boundary.
 		parent = sc.cur.parent.clone()
 	}
-	sc.parent = newSequenceChunker(parent, sc.vw, sc.parentMakeChunk, sc.parentMakeChunk, metaHashValueBytes)
+	sc.parent = newSequenceChunker(parent, sc.vr, sc.vw, sc.parentMakeChunk, sc.parentMakeChunk, metaHashValueBytes)
 	sc.parent.isLeaf = false
 }
 
@@ -201,8 +203,7 @@ func (sc *sequenceChunker) anyPending() bool {
 }
 
 // Returns the root sequence of the resulting tree. The logic here is subtle, but hopefully correct and understandable. See comments inline.
-func (sc *sequenceChunker) Done(vr ValueReader) sequence {
-	d.Chk.True((vr == nil) == (sc.vw == nil))
+func (sc *sequenceChunker) Done() sequence {
 	d.Chk.False(sc.done)
 	sc.done = true
 
@@ -217,7 +218,7 @@ func (sc *sequenceChunker) Done(vr ValueReader) sequence {
 			sc.handleChunkBoundary()
 		}
 
-		return sc.parent.Done(vr)
+		return sc.parent.Done()
 	}
 
 	// At this point, we know this chunker contains, in |current| every item at this level of the resulting tree. To see this, consider that there are two ways a chunker can enter items into its |current|: (1) as the result of resume() with the cursor on anything other than the first item in the sequence, and (2) as a result of a child chunker hitting an explicit chunk boundary during either Append() or finalize(). The only way there can be no items in some parent chunker's |current| is if this chunker began with cursor within its first existing chunk (and thus all parents resume()'d with a cursor on their first item) and continued through all sebsequent items without creating any explicit chunk boundaries (and thus never sent any items up to a parent as a result of chunking). Therefore, this chunker's current must contain all items within the current sequence.
@@ -235,7 +236,7 @@ func (sc *sequenceChunker) Done(vr ValueReader) sequence {
 	mt := sc.current[0].(metaTuple)
 
 	for {
-		child := mt.getChildSequence(vr)
+		child := mt.getChildSequence(sc.vr)
 		if _, ok := child.(metaSequence); !ok || child.seqLen() > 1 {
 			return child
 		}

diff --git a/go/types/set.go b/go/types/set.go
@@ -21,13 +21,13 @@ func newSet(seq orderedSequence) Set {
 
 func NewSet(v ...Value) Set {
 	data := buildSetData(v)
-	seq := newEmptySequenceChunker(nil, makeSetLeafChunkFn(nil), newOrderedMetaSequenceChunkFn(SetKind, nil), hashValueBytes)
+	seq := newEmptySequenceChunker(nil, nil, makeSetLeafChunkFn(nil), newOrderedMetaSequenceChunkFn(SetKind, nil), hashValueBytes)
 
 	for _, v := range data {
 		seq.Append(v)
 	}
 
-	return newSet(seq.Done(nil).(orderedSequence))
+	return newSet(seq.Done().(orderedSequence))
 }
 
 // Computes the diff from |last| to |s| using "best" algorithm, which balances returning results early vs completing quickly.
@@ -138,7 +138,7 @@ func (s Set) Remove(values ...Value) Set {
 }
 
 func (s Set) splice(cur *sequenceCursor, deleteCount uint64, vs ...Value) Set {
-	ch := newSequenceChunker(cur, nil, makeSetLeafChunkFn(s.seq.valueReader()), newOrderedMetaSequenceChunkFn(SetKind, s.seq.valueReader()), hashValueBytes)
+	ch := newSequenceChunker(cur, s.seq.valueReader(), nil, makeSetLeafChunkFn(s.seq.valueReader()), newOrderedMetaSequenceChunkFn(SetKind, s.seq.valueReader()), hashValueBytes)
 	for deleteCount > 0 {
 		ch.Skip()
 		deleteCount--
@@ -148,7 +148,7 @@ func (s Set) splice(cur *sequenceCursor, deleteCount uint64, vs ...Value) Set {
 		ch.Append(v)
 	}
 
-	ns := newSet(ch.Done(nil).(orderedSequence))
+	ns := newSet(ch.Done().(orderedSequence))
 	return ns
 }
 

diff --git a/go/types/set_test.go b/go/types/set_test.go
@@ -926,3 +926,25 @@ func TestChunkedSetWithValuesOfEveryType(t *testing.T) {
 		assert.Equal(len(vs), int(s.Len()))
 	}
 }
+
+func TestSetRemoveLastWhenNotLoaded(t *testing.T) {
+	assert := assert.New(t)
+
+	smallTestChunks()
+	defer normalProductionChunks()
+
+	vs := NewTestValueStore()
+	reload := func(s Set) Set {
+		return vs.ReadValue(vs.WriteValue(s).TargetHash()).(Set)
+	}
+
+	ts := getTestNativeOrderSet(8)
+	ns := ts.toSet()
+
+	for len(ts) > 0 {
+		last := ts[len(ts)-1]
+		ts = ts[:len(ts)-1]
+		ns = reload(ns.Remove(last))
+		assert.True(ts.toSet().Equals(ns))
+	}
+}
diff --git a/js/src/blob.js b/js/src/blob.js
@@ -7,8 +7,7 @@
 import * as Bytes from './bytes.js';
 import Collection from './collection.js';
 import RollingValueHasher from './rolling-value-hasher.js';
-import SequenceChunker from './sequence-chunker.js';
-import {chunkSequence} from './sequence-chunker.js';
+import {default as SequenceChunker, chunkSequence} from './sequence-chunker.js';
 import type {EqualsFn} from './edit-distance.js';
 import type {ValueReader, ValueReadWriter} from './value-store.js';
 import type {makeChunkFn} from './sequence-chunker.js';
@@ -22,7 +21,7 @@ import {hashValueByte} from './rolling-value-hasher.js';
 
 export default class Blob extends Collection<IndexedSequence> {
   constructor(bytes: Uint8Array) {
-    const chunker = new SequenceChunker(null, null, newBlobLeafChunkFn(null),
+    const chunker = new SequenceChunker(null, null, null, newBlobLeafChunkFn(null),
         newIndexedMetaSequenceChunkFn(Kind.Blob, null), blobHashValueBytes);
 
     for (let i = 0; i < bytes.length; i++) {
@@ -45,7 +44,7 @@ export default class Blob extends Collection<IndexedSequence> {
   splice(idx: number, deleteCount: number, insert: Uint8Array): Promise<Blob> {
     const vr = this.sequence.vr;
     return this.sequence.newCursorAt(idx).then(cursor =>
-      chunkSequence(cursor, Array.from(insert), deleteCount, newBlobLeafChunkFn(vr),
+      chunkSequence(cursor, vr, Array.from(insert), deleteCount, newBlobLeafChunkFn(vr),
                     newIndexedMetaSequenceChunkFn(Kind.Blob, vr, null),
                     hashValueByte)).then(s => Blob.fromSequence(s));
   }
@@ -180,7 +179,7 @@ export class BlobWriter {
 
   constructor(vrw: ?ValueReadWriter) {
     this._state = 'writable';
-    this._chunker = new SequenceChunker(null, vrw, newBlobLeafChunkFn(vrw),
+    this._chunker = new SequenceChunker(null, vrw, vrw, newBlobLeafChunkFn(vrw),
         newIndexedMetaSequenceChunkFn(Kind.Blob, vrw), blobHashValueBytes);
     this._vrw = vrw;
   }

diff --git a/js/src/list-test.js b/js/src/list-test.js
@@ -15,6 +15,7 @@ import {equals} from './compare.js';
 import {invariant, notNull} from './assert.js';
 import {newStruct} from './struct.js';
 import type Value from './value.js';
+import {smallTestChunks, normalProductionChunks} from './rolling-value-hasher.js';
 import {
   makeListType,
   makeRefType,
@@ -297,10 +298,15 @@ suite('CompoundList', () => {
   let db;
 
   setup(() => {
+    smallTestChunks();
     db = new TestDatabase();
   });
 
-  teardown((): Promise<void> => db.close());
+  teardown(async () => {
+    normalProductionChunks();
+    await db.close();
+  });
+
   function build(): List {
     const l1 = new List(['a', 'b']);
     const r1 = db.writeValue(l1);
@@ -364,6 +370,23 @@ suite('CompoundList', () => {
         [{done: false, value: 'a'}, {done: false, value: 'b'}, {done: true}, {done: true}],
         values);
   });
+
+  test('Remove last when not loaded', async () => {
+    const reload = async (l: List): Promise<List> => {
+      const l2 = await db.readValue(db.writeValue(l).targetHash);
+      invariant(l2 instanceof List);
+      return l2;
+    };
+
+    let vals = intSequence(64);
+    let list = new List(vals);
+
+    while (vals.length > 0) {
+      vals = vals.slice(0, vals.length - 1);
+      list = await list.remove(vals.length).then(reload);
+      assert.isTrue(equals(new List(vals), list));
+    }
+  });
 });
 
 suite('Diff List', () => {