Implement spec-compliant share splitting (#246)

* Export block data compute shares. * Refactor to use ShareSize constant directly. * Change message splitting to prefix namespace ID. * Implement chunking for contiguous. * Add termination condition. * Rename append contiguous to split contiguous. * Update test for small tx. * Add test for two contiguous. * Make tx and msg adjusted share sizes exported constants. * Panic on hopefully-unreachable condition instead of silently skipping. * Update hardcoded response for block format. Co-authored-by: Ismail Khoffi <Ismail.Khoffi@gmail.com>
celestiaorg · Mar 25, 2021 · 7eb6a61 · 7eb6a61
1 parent d129a28
commit 7eb6a61
Show file tree

Hide file tree

Showing 7 changed files with 166 additions and 56 deletions.
diff --git a/blockchain/msgs_test.go b/blockchain/msgs_test.go
@@ -97,7 +97,7 @@ func TestBlockchainMessageVectors(t *testing.T) {
 			BlockRequest: &bcproto.BlockRequest{Height: math.MaxInt64}}},
 			"0a0a08ffffffffffffffff7f"},
 		{"BlockResponseMessage", &bcproto.Message{Sum: &bcproto.Message_BlockResponse{
-			BlockResponse: &bcproto.BlockResponse{Block: bpb}}}, "1ac0020abd020a5b0a02080b1803220b088092b8c398feffffff012a0212003a2016106406aededa633a265efd5833af53775af5a27c803518313c9b77e68875926a20e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85512130a0b48656c6c6f20576f726c6412001a0022001ac8010a3000000000000000010000000000000001266fcca850585e31b54074399b0dbb5b04b408b77d5f357a528ed3e04b63c3c10a30ffffffffffffffffffffffffffffffffe04e02811b28234428f1c1219d5627f61c02c415c175bdabd9e7934519baef07123000000000000000010000000000000001266fcca850585e31b54074399b0dbb5b04b408b77d5f357a528ed3e04b63c3c11230ffffffffffffffffffffffffffffffffe04e02811b28234428f1c1219d5627f61c02c415c175bdabd9e7934519baef07"},
+			BlockResponse: &bcproto.BlockResponse{Block: bpb}}}, "1ac0020abd020a5b0a02080b1803220b088092b8c398feffffff012a0212003a204c149a7cfadc92b669b0cbfa4951a1b18c2d9f3177a3b8756d39ebb96e9d63316a20e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b85512130a0b48656c6c6f20576f726c6412001a0022001ac8010a3000000000000000010000000000000001b81cb5596c28d044214b9f935e4af7dbe76e417f6182d86fbee68bfff7b2ff3a0a30ffffffffffffffffffffffffffffffffc4096ba8fccf882c309896e9168fa43fe62fccb752cb12d5160cc1d9c2ebffe7123000000000000000010000000000000001b81cb5596c28d044214b9f935e4af7dbe76e417f6182d86fbee68bfff7b2ff3a1230ffffffffffffffffffffffffffffffffc4096ba8fccf882c309896e9168fa43fe62fccb752cb12d5160cc1d9c2ebffe7"},
 		{"NoBlockResponseMessage", &bcproto.Message{Sum: &bcproto.Message_NoBlockResponse{
 			NoBlockResponse: &bcproto.NoBlockResponse{Height: 1}}}, "12020801"},
 		{"NoBlockResponseMessage", &bcproto.Message{Sum: &bcproto.Message_NoBlockResponse{

diff --git a/types/block.go b/types/block.go
@@ -200,7 +200,7 @@ func (b *Block) fillHeader() {
 // fillDataAvailabilityHeader fills in any remaining DataAvailabilityHeader fields
 // that are a function of the block data.
 func (b *Block) fillDataAvailabilityHeader() {
-	namespacedShares := b.Data.computeShares()
+	namespacedShares := b.Data.ComputeShares()
 	shares := namespacedShares.RawShares()
 	if len(shares) == 0 {
 		// no shares -> no row/colum roots -> hash(empty)
@@ -272,7 +272,7 @@ func (b *Block) PutBlock(ctx context.Context, nodeAdder format.NodeAdder) error
 	}
 
 	// recompute the shares
-	namespacedShares := b.Data.computeShares()
+	namespacedShares := b.Data.ComputeShares()
 	shares := namespacedShares.RawShares()
 
 	// don't do anything if there is no data to put on IPFS
@@ -1312,41 +1312,43 @@ type IntermediateStateRoots struct {
 	RawRootsList []tmbytes.HexBytes `json:"intermediate_roots"`
 }
 
-func (roots IntermediateStateRoots) splitIntoShares(shareSize int) NamespacedShares {
-	shares := make([]NamespacedShare, 0)
+func (roots IntermediateStateRoots) splitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, 0, len(roots.RawRootsList))
 	for _, root := range roots.RawRootsList {
 		rawData, err := root.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app returned intermediate state root that can not be encoded %#v", root))
 		}
-		shares = appendToShares(shares, IntermediateStateRootsNamespaceID, rawData, shareSize)
+		rawDatas = append(rawDatas, rawData)
 	}
+	shares := splitContiguous(IntermediateStateRootsNamespaceID, rawDatas)
 	return shares
 }
 
-func (msgs Messages) splitIntoShares(shareSize int) NamespacedShares {
+func (msgs Messages) splitIntoShares() NamespacedShares {
 	shares := make([]NamespacedShare, 0)
 	for _, m := range msgs.MessagesList {
 		rawData, err := m.MarshalDelimited()
 		if err != nil {
 			panic(fmt.Sprintf("app accepted a Message that can not be encoded %#v", m))
 		}
-		shares = appendToShares(shares, m.NamespaceID, rawData, shareSize)
+		shares = appendToShares(shares, m.NamespaceID, rawData)
 	}
 	return shares
 }
 
-func (data *Data) computeShares() NamespacedShares {
+// ComputeShares splits block data into shares of an original data square.
+func (data *Data) ComputeShares() NamespacedShares {
 	// TODO(ismail): splitting into shares should depend on the block size and layout
 	// see: https://github.com/lazyledger/lazyledger-specs/blob/master/specs/block_proposer.md#laying-out-transactions-and-messages
 
 	// reserved shares:
-	txShares := data.Txs.splitIntoShares(ShareSize)
-	intermRootsShares := data.IntermediateStateRoots.splitIntoShares(ShareSize)
-	evidenceShares := data.Evidence.splitIntoShares(ShareSize)
+	txShares := data.Txs.splitIntoShares()
+	intermRootsShares := data.IntermediateStateRoots.splitIntoShares()
+	evidenceShares := data.Evidence.splitIntoShares()
 
 	// application data shares from messages:
-	msgShares := data.Messages.splitIntoShares(ShareSize)
+	msgShares := data.Messages.splitIntoShares()
 	curLen := len(txShares) + len(intermRootsShares) + len(evidenceShares) + len(msgShares)
 
 	// FIXME(ismail): this is not a power of two
@@ -1587,8 +1589,8 @@ func (data *EvidenceData) FromProto(eviData *tmproto.EvidenceList) error {
 	return nil
 }
 
-func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
-	shares := make([]NamespacedShare, 0)
+func (data *EvidenceData) splitIntoShares() NamespacedShares {
+	rawDatas := make([][]byte, 0, len(data.Evidence))
 	for _, ev := range data.Evidence {
 		var rawData []byte
 		var err error
@@ -1608,8 +1610,9 @@ func (data *EvidenceData) splitIntoShares(shareSize int) NamespacedShares {
 		if err != nil {
 			panic(fmt.Sprintf("evidence included in evidence pool that can not be encoded %#v, err: %v", ev, err))
 		}
-		shares = appendToShares(shares, EvidenceNamespaceID, rawData, shareSize)
+		rawDatas = append(rawDatas, rawData)
 	}
+	shares := splitContiguous(EvidenceNamespaceID, rawDatas)
 	return shares
 }
 

diff --git a/types/block_test.go b/types/block_test.go
@@ -1360,7 +1360,7 @@ func TestPutBlock(t *testing.T) {
 			defer cancel()
 
 			block.fillDataAvailabilityHeader()
-			tc.blockData.computeShares()
+			tc.blockData.ComputeShares()
 			for _, rowRoot := range block.DataAvailabilityHeader.RowsRoots.Bytes() {
 				// recreate the cids using only the computed roots
 				cid, err := nodes.CidFromNamespacedSha256(rowRoot)

diff --git a/types/consts.go b/types/consts.go
@@ -16,6 +16,14 @@ const (
 	// NamespaceSize is the namespace size in bytes.
 	NamespaceSize = 8
 
+	// ShareReservedBytes is the reserved bytes for contiguous appends.
+	ShareReservedBytes = 1
+
+	// TxShareSize is the number of bytes usable for tx/evidence/ISR shares.
+	TxShareSize = ShareSize - NamespaceSize - ShareReservedBytes
+	// MsgShareSize is the number of bytes usable for message shares.
+	MsgShareSize = ShareSize - NamespaceSize
+
 	// MaxSquareSize is the maximum number of
 	// rows/columns of the original data shares in square layout.
 	// Corresponds to AVAILABLE_DATA_ORIGINAL_SQUARE_MAX in the spec.

diff --git a/types/shares.go b/types/shares.go
@@ -56,35 +56,93 @@ func (m Message) MarshalDelimited() ([]byte, error) {
 	return append(lenBuf[:n], m.Data...), nil
 }
 
-func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte, shareSize int) []NamespacedShare {
-	if len(rawData) < shareSize {
-		rawShare := rawData
-		paddedShare := zeroPadIfNecessary(rawShare, shareSize)
+// appendToShares appends raw data as shares.
+// Used for messages.
+func appendToShares(shares []NamespacedShare, nid namespace.ID, rawData []byte) []NamespacedShare {
+	if len(rawData) < MsgShareSize {
+		rawShare := []byte(append(nid, rawData...))
+		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
+		share := NamespacedShare{paddedShare, nid}
+		shares = append(shares, share)
+	} else { // len(rawData) >= MsgShareSize
+		shares = append(shares, split(rawData, nid)...)
+	}
+	return shares
+}
+
+// splitContiguous splits multiple raw data contiguously as shares.
+// Used for transactions, intermediate state roots, and evidence.
+func splitContiguous(nid namespace.ID, rawDatas [][]byte) []NamespacedShare {
+	shares := make([]NamespacedShare, 0)
+	// Index into the outer slice of rawDatas
+	outerIndex := 0
+	// Index into the inner slice of rawDatas
+	innerIndex := 0
+	for outerIndex < len(rawDatas) {
+		var rawData []byte
+		startIndex := 0
+		rawData, outerIndex, innerIndex, startIndex = getNextChunk(rawDatas, outerIndex, innerIndex, TxShareSize)
+		rawShare := []byte(append(append(nid, byte(startIndex)), rawData...))
+		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
-	} else { // len(rawData) >= shareSize
-		shares = append(shares, split(rawData, shareSize, nid)...)
 	}
 	return shares
 }
 
 // TODO(ismail): implement corresponding merge method for clients requesting
 // shares for a particular namespace
-func split(rawData []byte, shareSize int, nid namespace.ID) []NamespacedShare {
+func split(rawData []byte, nid namespace.ID) []NamespacedShare {
 	shares := make([]NamespacedShare, 0)
-	firstRawShare := rawData[:shareSize]
+	firstRawShare := []byte(append(nid, rawData[:MsgShareSize]...))
 	shares = append(shares, NamespacedShare{firstRawShare, nid})
-	rawData = rawData[shareSize:]
+	rawData = rawData[MsgShareSize:]
 	for len(rawData) > 0 {
-		shareSizeOrLen := min(shareSize, len(rawData))
-		paddedShare := zeroPadIfNecessary(rawData[:shareSizeOrLen], shareSize)
+		shareSizeOrLen := min(MsgShareSize, len(rawData))
+		rawShare := []byte(append(nid, rawData[:shareSizeOrLen]...))
+		paddedShare := zeroPadIfNecessary(rawShare, ShareSize)
 		share := NamespacedShare{paddedShare, nid}
 		shares = append(shares, share)
 		rawData = rawData[shareSizeOrLen:]
 	}
 	return shares
 }
 
+// getNextChunk gets the next chunk for contiguous shares
+// Precondition: none of the slices in rawDatas is zero-length
+// This precondition should always hold at this point since zero-length txs are simply invalid.
+func getNextChunk(rawDatas [][]byte, outerIndex int, innerIndex int, width int) ([]byte, int, int, int) {
+	rawData := make([]byte, 0, width)
+	startIndex := 0
+	firstBytesToFetch := 0
+
+	curIndex := 0
+	for curIndex < width && outerIndex < len(rawDatas) {
+		bytesToFetch := min(len(rawDatas[outerIndex])-innerIndex, width-curIndex)
+		if bytesToFetch == 0 {
+			panic("zero-length contiguous share data is invalid")
+		}
+		if curIndex == 0 {
+			firstBytesToFetch = bytesToFetch
+		}
+		// If we've already placed some data in this chunk, that means
+		// a new data segment begins
+		if curIndex != 0 {
+			// Offset by the fixed reserved bytes at the beginning of the share
+			startIndex = firstBytesToFetch + NamespaceSize + ShareReservedBytes
+		}
+		rawData = append(rawData, rawDatas[outerIndex][innerIndex:innerIndex+bytesToFetch]...)
+		innerIndex += bytesToFetch
+		if innerIndex >= len(rawDatas[outerIndex]) {
+			innerIndex = 0
+			outerIndex++
+		}
+		curIndex += bytesToFetch
+	}
+
+	return rawData, outerIndex, innerIndex, startIndex
+}
+
 func GenerateTailPaddingShares(n int, shareWidth int) NamespacedShares {
 	shares := make([]NamespacedShare, n)
 	for i := 0; i < n; i++ {

diff --git a/types/shares_test.go b/types/shares_test.go
@@ -10,13 +10,12 @@ import (
 )
 
 type splitter interface {
-	splitIntoShares(shareSize int) NamespacedShares
+	splitIntoShares() NamespacedShares
 }
 
 func TestMakeShares(t *testing.T) {
 	reservedTxNamespaceID := append(bytes.Repeat([]byte{0}, 7), 1)
 	reservedEvidenceNamespaceID := append(bytes.Repeat([]byte{0}, 7), 3)
-	// resveredIntermediateStateRootsNamespaceID := append(bytes.Repeat([]byte{0}, 7), 2)
 	val := NewMockPV()
 	blockID := makeBlockID([]byte("blockhash"), 1000, []byte("partshash"))
 	blockID2 := makeBlockID([]byte("blockhash2"), 1000, []byte("partshash"))
@@ -37,12 +36,11 @@ func TestMakeShares(t *testing.T) {
 	}
 	msg1Marshaled, _ := msg1.MarshalDelimited()
 	if err != nil {
-		t.Fatalf("Could not encode evidence: %v, error: %v", testEvidence, err)
+		t.Fatalf("Could not encode evidence: %v, error: %v\n", testEvidence, err)
 	}
 
 	type args struct {
-		data      splitter
-		shareSize int
+		data splitter
 	}
 	tests := []struct {
 		name string
@@ -54,59 +52,101 @@ func TestMakeShares(t *testing.T) {
 				data: &EvidenceData{
 					Evidence: []Evidence{testEvidence},
 				},
-				shareSize: ShareSize,
 			}, NamespacedShares{NamespacedShare{
-				Share: testEvidenceBytes[:ShareSize],
-				ID:    reservedEvidenceNamespaceID,
+				Share: append(
+					append(reservedEvidenceNamespaceID, byte(0)),
+					testEvidenceBytes[:TxShareSize]...,
+				),
+				ID: reservedEvidenceNamespaceID,
 			}, NamespacedShare{
-				Share: zeroPadIfNecessary(testEvidenceBytes[ShareSize:], ShareSize),
-				ID:    reservedEvidenceNamespaceID,
+				Share: append(
+					append(reservedEvidenceNamespaceID, byte(0)),
+					zeroPadIfNecessary(testEvidenceBytes[TxShareSize:], TxShareSize)...,
+				),
+				ID: reservedEvidenceNamespaceID,
 			}},
 		},
 		{"small LL Tx",
 			args{
-				data:      Txs{smolTx},
-				shareSize: ShareSize,
+				data: Txs{smolTx},
 			},
 			NamespacedShares{
 				NamespacedShare{
-					Share: zeroPadIfNecessary(smolTxLenDelimited, ShareSize),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						zeroPadIfNecessary(smolTxLenDelimited, TxShareSize)...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 			},
 		},
 		{"one large LL Tx",
 			args{
-				data:      Txs{largeTx},
-				shareSize: ShareSize,
+				data: Txs{largeTx},
 			},
 			NamespacedShares{
 				NamespacedShare{
-					Share: Share(largeTxLenDelimited[:ShareSize]),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						largeTxLenDelimited[:TxShareSize]...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 				NamespacedShare{
-					Share: zeroPadIfNecessary(largeTxLenDelimited[ShareSize:], ShareSize),
-					ID:    reservedTxNamespaceID,
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						zeroPadIfNecessary(largeTxLenDelimited[TxShareSize:], TxShareSize)...,
+					),
+					ID: reservedTxNamespaceID,
+				},
+			},
+		},
+		{"large then small LL Tx",
+			args{
+				data: Txs{largeTx, smolTx},
+			},
+			NamespacedShares{
+				NamespacedShare{
+					Share: append(
+						append(reservedTxNamespaceID, byte(0)),
+						largeTxLenDelimited[:TxShareSize]...,
+					),
+					ID: reservedTxNamespaceID,
+				},
+				NamespacedShare{
+					Share: append(
+						append(reservedTxNamespaceID, byte(len(largeTxLenDelimited)-TxShareSize+NamespaceSize+ShareReservedBytes)),
+						zeroPadIfNecessary(
+							append(largeTxLenDelimited[TxShareSize:], smolTxLenDelimited...),
+							TxShareSize,
+						)...,
+					),
+					ID: reservedTxNamespaceID,
 				},
 			},
 		},
 		{"ll-app message",
 			args{
-				data:      Messages{[]Message{msg1}},
-				shareSize: ShareSize,
+				data: Messages{[]Message{msg1}},
 			},
 			NamespacedShares{
-				NamespacedShare{zeroPadIfNecessary(msg1Marshaled, ShareSize), msg1.NamespaceID},
+				NamespacedShare{
+					Share: append(
+						[]byte(msg1.NamespaceID),
+						zeroPadIfNecessary(msg1Marshaled, MsgShareSize)...,
+					),
+					ID: msg1.NamespaceID,
+				},
 			},
 		},
 	}
 	for i, tt := range tests {
 		tt := tt // stupid scopelint :-/
 		i := i
 		t.Run(tt.name, func(t *testing.T) {
-			if got := tt.args.data.splitIntoShares(tt.args.shareSize); !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("%v: makeShares() = \n%v\nwant\n%v", i, got, tt.want)
+			got := tt.args.data.splitIntoShares()
+			if !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("%v: makeShares() = \n%+v\nwant\n%+v\n", i, got, tt.want)
 			}
 		})
 	}