-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
398 additions
and
234 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,115 +1,168 @@ | ||
package bundler | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"fmt" | ||
"github.com/golang/protobuf/proto" | ||
"github.com/jhump/protoreflect/dynamic" | ||
"github.com/streamingfast/bstream" | ||
"github.com/streamingfast/derr" | ||
"github.com/streamingfast/dstore" | ||
"github.com/streamingfast/substreams-sink-files/sink" | ||
"go.uber.org/zap" | ||
) | ||
|
||
type BundlerType string | ||
type FileType string | ||
|
||
const ( | ||
BundlerTypeJSON BundlerType = "json" | ||
FileTypeJSONL FileType = "jsonl" | ||
) | ||
|
||
type Bundler struct { | ||
size uint64 | ||
bundlerType BundlerType | ||
encoder Encoder | ||
store dstore.Store | ||
size uint64 | ||
encoder Encoder | ||
|
||
objects []proto.Message | ||
fileStores *DStoreIO | ||
stateStore *StateStore | ||
fileType FileType | ||
|
||
startBlockNum uint64 | ||
activeBoundary *bstream.Range | ||
|
||
zlogger *zap.Logger | ||
} | ||
|
||
func New(store dstore.Store, size uint64, startBlock uint64, bundlerType BundlerType, zlogger *zap.Logger) (*Bundler, error) { | ||
func New( | ||
store dstore.Store, | ||
stateFilePath string, | ||
size uint64, | ||
fileType FileType, | ||
zlogger *zap.Logger, | ||
) (*Bundler, error) { | ||
|
||
stateStore, err := loadStateStore(stateFilePath) | ||
if err != nil { | ||
return nil, fmt.Errorf("load state store: %w", err) | ||
} | ||
|
||
b := &Bundler{ | ||
store: store, | ||
size: size, | ||
bundlerType: bundlerType, | ||
startBlockNum: startBlock, | ||
objects: []proto.Message{}, | ||
zlogger: zlogger, | ||
fileStores: newDStoreIO(store, 3, zlogger), | ||
stateStore: stateStore, | ||
fileType: fileType, | ||
size: size, | ||
zlogger: zlogger, | ||
} | ||
|
||
switch bundlerType { | ||
case BundlerTypeJSON: | ||
b.encoder = JSONEncode | ||
switch fileType { | ||
case FileTypeJSONL: | ||
b.encoder = JSONLEncode | ||
default: | ||
return nil, fmt.Errorf("invalid bundler type %q", bundlerType) | ||
return nil, fmt.Errorf("invalid file type %q", fileType) | ||
} | ||
return b, nil | ||
} | ||
|
||
func (b *Bundler) ForceFlush(ctx context.Context) error { | ||
boundary := bstream.NewRangeExcludingEnd(b.startBlockNum, b.startBlockNum+b.size) | ||
if err := b.save(ctx, boundary); err != nil { | ||
return fmt.Errorf("save %q: %w", boundary.String(), err) | ||
func (b *Bundler) GetCursor() (*sink.Cursor, error) { | ||
return b.stateStore.Read() | ||
} | ||
|
||
func (b *Bundler) Start(blockNum uint64) error { | ||
boundaryRange := b.newBoundary(blockNum) | ||
b.activeBoundary = boundaryRange | ||
filename := b.filename(boundaryRange) | ||
b.zlogger.Info("starting new file boundary", zap.Stringer("boundary", boundaryRange), zap.String("fiename", filename)) | ||
|
||
if err := b.fileStores.StartFile(filename); err != nil { | ||
return fmt.Errorf("start file: %w", err) | ||
} | ||
|
||
b.stateStore.newBoundary(filename, boundaryRange) | ||
return nil | ||
} | ||
|
||
func (b *Bundler) Flush(ctx context.Context, blockNum uint64) (bool, error) { | ||
boundaries := b.boundariesToSave(blockNum) | ||
if len(boundaries) == 0 { | ||
return false, nil | ||
func (b *Bundler) Stop() error { | ||
b.zlogger.Info("stopping file boundary") | ||
|
||
if err := b.fileStores.CloseFile(); err != nil { | ||
return fmt.Errorf("closing file: %w", err) | ||
} | ||
for _, boundary := range boundaries { | ||
if err := b.save(ctx, boundary); err != nil { | ||
return false, fmt.Errorf("save %q: %w", boundary.String(), err) | ||
} | ||
|
||
if err := b.stateStore.Save(); err != nil { | ||
return fmt.Errorf("failed to save state: %w", err) | ||
} | ||
return true, nil | ||
|
||
b.activeBoundary = nil | ||
return nil | ||
} | ||
|
||
func (b *Bundler) Write(entities []*dynamic.Message) { | ||
for _, entity := range entities { | ||
b.objects = append(b.objects, proto.Message(entity)) | ||
func (b *Bundler) Roll(ctx context.Context, blockNum uint64) error { | ||
if b.activeBoundary.Contains(blockNum) { | ||
return nil | ||
} | ||
} | ||
|
||
func (b *Bundler) save(ctx context.Context, boundary *bstream.Range) error { | ||
filename := b.filename(boundary) | ||
boundaries := boundariesToSkip(b.activeBoundary, blockNum, b.size) | ||
|
||
b.zlogger.Debug("storing boundary", | ||
zap.String("filename", filename), | ||
b.zlogger.Info("block_num is not in active boundary", | ||
zap.Stringer("active_boundary", b.activeBoundary), | ||
zap.Int("boundaries_to_skip", len(boundaries)), | ||
zap.Uint64("block_num", blockNum), | ||
) | ||
|
||
content, err := b.encoder(b.objects) | ||
if err != nil { | ||
return fmt.Errorf("encode objets: %w", err) | ||
if err := b.Stop(); err != nil { | ||
return fmt.Errorf("stop active boundary: %w", err) | ||
} | ||
|
||
if err := derr.RetryContext(ctx, 3, func(ctx context.Context) error { | ||
return b.store.WriteObject(ctx, filename, bytes.NewReader(content)) | ||
}); err != nil { | ||
return fmt.Errorf("write object: %w", err) | ||
for _, boundary := range boundaries { | ||
if err := b.Start(boundary.StartBlock()); err != nil { | ||
return fmt.Errorf("start skipping boundary: %w", err) | ||
} | ||
if err := b.Stop(); err != nil { | ||
return fmt.Errorf("stop skipping boundary: %w", err) | ||
} | ||
} | ||
|
||
b.objects = []proto.Message{} | ||
b.startBlockNum = *boundary.EndBlock() | ||
if err := b.Start(blockNum); err != nil { | ||
return fmt.Errorf("start skipping boundary: %w", err) | ||
} | ||
return nil | ||
} | ||
|
||
func (b *Bundler) newBoundary(containingBlockNum uint64) *bstream.Range { | ||
startBlock := containingBlockNum - (containingBlockNum % b.size) | ||
return bstream.NewRangeExcludingEnd(startBlock, startBlock+b.size) | ||
} | ||
|
||
func (b *Bundler) filename(blockRange *bstream.Range) string { | ||
return fmt.Sprintf("%010d-%010d.%s", blockRange.StartBlock(), (*blockRange.EndBlock()), b.bundlerType) | ||
return fmt.Sprintf("%010d-%010d.%s", blockRange.StartBlock(), (*blockRange.EndBlock()), b.fileType) | ||
} | ||
|
||
func (b *Bundler) Write(cursor *sink.Cursor, entities []*dynamic.Message) error { | ||
var buf []byte | ||
for _, entity := range entities { | ||
cnt, err := b.encoder(proto.Message(entity)) | ||
if err != nil { | ||
return fmt.Errorf("failed to encode: %w", err) | ||
} | ||
buf = append(buf, cnt...) | ||
} | ||
|
||
if _, err := b.fileStores.activeWriter.Write(buf); err != nil { | ||
return fmt.Errorf("failed to write data: %w", err) | ||
} | ||
|
||
b.stateStore.setCursor(cursor) | ||
return nil | ||
} | ||
|
||
func (b *Bundler) boundariesToSave(blockNum uint64) (out []*bstream.Range) { | ||
rangeStartBlock := b.startBlockNum | ||
for blockNum >= rangeStartBlock+b.size { | ||
out = append(out, bstream.NewRangeExcludingEnd(rangeStartBlock, rangeStartBlock+b.size)) | ||
rangeStartBlock = rangeStartBlock + b.size | ||
func boundariesToSkip(lastBoundary *bstream.Range, blockNum uint64, size uint64) (out []*bstream.Range) { | ||
iter := *lastBoundary.EndBlock() | ||
endBlock := computeEndBlock(iter, size) | ||
for blockNum >= endBlock { | ||
out = append(out, bstream.NewRangeExcludingEnd(iter, endBlock)) | ||
iter = endBlock | ||
endBlock = computeEndBlock(iter, size) | ||
} | ||
return out | ||
} | ||
|
||
func computeEndBlock(startBlockNum, size uint64) uint64 { | ||
return (startBlockNum + size) - (startBlockNum+size)%size | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.