From 3e1cdd915230382bd6cc21d4286ef6fdebc5cff0 Mon Sep 17 00:00:00 2001 From: Julian Hegler Date: Fri, 22 May 2020 11:11:22 -0400 Subject: [PATCH] Confirm `badgerMove` entry required before rewrite (#1302) Value log badgerMove entries for deleted keys are always rewritten. This leads to exponential DB size growth in heavy write/delete use cases with value pointer entries. To prevent this, a check has been added to confirm the value is still needed for badgerMove entries. --- db_test.go | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++ value.go | 16 +++++++-- 2 files changed, 114 insertions(+), 3 deletions(-) diff --git a/db_test.go b/db_test.go index e0532ba8a..95ccdf30f 100644 --- a/db_test.go +++ b/db_test.go @@ -378,6 +378,107 @@ func TestForceCompactL0(t *testing.T) { require.NoError(t, db.Close()) } +func dirSize(path string) (int64, error) { + var size int64 + err := filepath.Walk(path, func(_ string, info os.FileInfo, err error) error { + if err != nil { + if os.IsNotExist(err) { + return nil + } + return err + } + if !info.IsDir() { + size += info.Size() + } + return err + }) + return (size >> 20), err +} + +// BenchmarkDbGrowth ensures DB does not grow with repeated adds and deletes. +// +// New keys are created with each for-loop iteration. During each +// iteration, the previous for-loop iteration's keys are deleted. +// +// To reproduce continous growth problem due to `badgerMove` keys, +// update `value.go` `discardEntry` line 1628 to return false +// +// Also with PR #1303, the delete keys are properly cleaned which +// further reduces disk size. +func BenchmarkDbGrowth(b *testing.B) { + dir, err := ioutil.TempDir("", "badger-test") + require.NoError(b, err) + defer removeDir(dir) + + start := 0 + lastStart := 0 + numKeys := 2000 + valueSize := 1024 + value := make([]byte, valueSize) + + discardRatio := 0.001 + maxWrites := 200 + opts := getTestOptions(dir) + opts.ValueLogFileSize = 64 << 15 + opts.MaxTableSize = 4 << 15 + opts.LevelOneSize = 16 << 15 + opts.NumVersionsToKeep = 1 + opts.NumLevelZeroTables = 1 + opts.NumLevelZeroTablesStall = 2 + opts.KeepL0InMemory = false // enable L0 compaction + db, err := Open(opts) + require.NoError(b, err) + for numWrites := 0; numWrites < maxWrites; numWrites++ { + txn := db.NewTransaction(true) + if start > 0 { + for i := lastStart; i < start; i++ { + key := make([]byte, 8) + binary.BigEndian.PutUint64(key[:], uint64(i)) + err := txn.Delete(key) + if err == ErrTxnTooBig { + require.NoError(b, txn.Commit()) + txn = db.NewTransaction(true) + } else { + require.NoError(b, err) + } + } + } + + for i := start; i < numKeys+start; i++ { + key := make([]byte, 8) + binary.BigEndian.PutUint64(key[:], uint64(i)) + err := txn.SetEntry(NewEntry(key, value)) + if err == ErrTxnTooBig { + require.NoError(b, txn.Commit()) + txn = db.NewTransaction(true) + } else { + require.NoError(b, err) + } + } + require.NoError(b, txn.Commit()) + require.NoError(b, db.Flatten(1)) + for { + err = db.RunValueLogGC(discardRatio) + if err == ErrNoRewrite { + break + } else { + require.NoError(b, err) + } + } + size, err := dirSize(dir) + require.NoError(b, err) + fmt.Printf("Badger DB Size = %dMB\n", size) + lastStart = start + start += numKeys + } + + db.Close() + size, err := dirSize(dir) + require.NoError(b, err) + require.LessOrEqual(b, size, int64(16)) + fmt.Printf("Badger DB Size = %dMB\n", size) +} + // Put a lot of data to move some data to disk. // WARNING: This test might take a while but it should pass! func TestGetMore(t *testing.T) { diff --git a/value.go b/value.go index 5816a8f24..b68ef10ba 100644 --- a/value.go +++ b/value.go @@ -518,7 +518,7 @@ func (vlog *valueLog) rewrite(f *logFile, tr trace.Trace) error { if err != nil { return err } - if discardEntry(e, vs) { + if discardEntry(e, vs, vlog.db) { return nil } @@ -1606,7 +1606,7 @@ func (vlog *valueLog) pickLog(head valuePointer, tr trace.Trace) (files []*logFi return files } -func discardEntry(e Entry, vs y.ValueStruct) bool { +func discardEntry(e Entry, vs y.ValueStruct, db *DB) bool { if vs.Version != y.ParseTs(e.Key) { // Version not found. Discard. return true @@ -1622,6 +1622,16 @@ func discardEntry(e Entry, vs y.ValueStruct) bool { // Just a txn finish entry. Discard. return true } + if bytes.HasPrefix(e.Key, badgerMove) { + // Verify the actual key entry without the badgerPrefix has not been deleted. + // If this is not done the badgerMove entry will be kept forever moving from + // vlog to vlog during rewrites. + avs, err := db.get(e.Key[len(badgerMove):]) + if err != nil { + return false + } + return avs.Version == 0 + } return false } @@ -1694,7 +1704,7 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace) if err != nil { return err } - if discardEntry(e, vs) { + if discardEntry(e, vs, vlog.db) { r.discard += esz return nil }