Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add protection when mmap somehow fails #362

Merged
merged 2 commits into from
Jan 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions bolt_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,11 @@ func munmap(db *DB) error {
}

addr := (uintptr)(unsafe.Pointer(&db.data[0]))
var err1 error
if err := syscall.UnmapViewOfFile(addr); err != nil {
return os.NewSyscallError("UnmapViewOfFile", err)
err1 = os.NewSyscallError("UnmapViewOfFile", err)
}
return nil
db.data = nil
db.datasz = 0
return err1
}
27 changes: 26 additions & 1 deletion db.go
Original file line number Diff line number Diff line change
Expand Up @@ -491,8 +491,19 @@ func (db *DB) mmap(minsz int) error {
return nil
}

func (db *DB) invalidate() {
db.dataref = nil
db.data = nil
db.datasz = 0

db.meta0 = nil
db.meta1 = nil
}

// munmap unmaps the data file from memory.
func (db *DB) munmap() error {
defer db.invalidate()

if err := munmap(db); err != nil {
return fmt.Errorf("unmap error: " + err.Error())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a topic for a separate PR... but I think we should start logging Errors early from the failed syscalls...

There is high probability they will lead to panic later (either on bbolt side, but most likely on customer'a application side) and there is significant probability that customer will not log the error themselves...
And this might lead to hard to diagnose problems of "etcd not working" while there was OOM or no disk space.

Failed syscalls is something that shoud not happen on well configured machines, so it should not make the app logs spammy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will think about this separately.

}
Expand Down Expand Up @@ -701,6 +712,13 @@ func (db *DB) beginTx() (*Tx, error) {
return nil, ErrDatabaseNotOpen
}

// Exit if the database is not correctly mapped.
if db.data == nil {
db.mmaplock.RUnlock()
db.metalock.Unlock()
return nil, ErrInvalidMapping
}

// Create a transaction associated with the database.
t := &Tx{}
t.init(db)
Expand Down Expand Up @@ -742,6 +760,12 @@ func (db *DB) beginRWTx() (*Tx, error) {
return nil, ErrDatabaseNotOpen
}

// Exit if the database is not correctly mapped.
if db.data == nil {
db.rwlock.Unlock()
return nil, ErrInvalidMapping
}

// Create a transaction associated with the database.
t := &Tx{writable: true}
t.init(db)
Expand Down Expand Up @@ -1016,6 +1040,7 @@ func (db *DB) Stats() Stats {
// This is for internal access to the raw data bytes from the C cursor, use
// carefully, or not at all.
func (db *DB) Info() *Info {
_assert(db.data != nil, "database file isn't correctly mapped")
return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
}

Expand All @@ -1042,7 +1067,7 @@ func (db *DB) meta() *meta {
metaB = db.meta0
}

// Use higher meta page if valid. Otherwise fallback to previous, if valid.
// Use higher meta page if valid. Otherwise, fallback to previous, if valid.
if err := metaA.validate(); err == nil {
return metaA
} else if err := metaB.validate(); err == nil {
Expand Down
24 changes: 24 additions & 0 deletions db_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@ import (
"math/rand"
"os"
"path/filepath"
"reflect"
"sync"
"testing"
"time"
"unsafe"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

bolt "go.etcd.io/bbolt"
Expand Down Expand Up @@ -1311,6 +1313,28 @@ func TestDB_BatchTime(t *testing.T) {
}
}

// TestDBUnmap verifes that `dataref`, `data` and `datasz` must be reset
// to zero values respectively after unmapping the db.
func TestDBUnmap(t *testing.T) {
db := btesting.MustCreateDB(t)

require.NoError(t, db.DB.Close())

// Ignore the following error:
// Error: copylocks: call of reflect.ValueOf copies lock value: go.etcd.io/bbolt.DB contains sync.Once contains sync.Mutex (govet)
//nolint:govet
v := reflect.ValueOf(*db.DB)
dataref := v.FieldByName("dataref")
data := v.FieldByName("data")
datasz := v.FieldByName("datasz")
assert.True(t, dataref.IsNil())
assert.True(t, data.IsNil())
assert.True(t, datasz.IsZero())

// Set db.DB to nil to prevent MustCheck from panicking.
db.DB = nil
}

func ExampleDB_Update() {
// Open the database.
db, err := bolt.Open(tempfile(), 0666, nil)
Expand Down
3 changes: 3 additions & 0 deletions errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ var (
// This typically occurs when a file is not a bolt database.
ErrInvalid = errors.New("invalid database")

// ErrInvalidMapping is returned when the database file fails to get mapped.
ErrInvalidMapping = errors.New("database isn't correctly mapped")

// ErrVersionMismatch is returned when the data file was created with a
// different version of Bolt.
ErrVersionMismatch = errors.New("version mismatch")
Expand Down
18 changes: 11 additions & 7 deletions tx.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,17 @@ func (tx *Tx) rollback() {
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
if !tx.db.hasSyncedFreelist() {
// Reconstruct free page list by scanning the DB to get the whole free page list.
// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
tx.db.freelist.noSyncReload(tx.db.freepages())
} else {
// Read free page list from freelist page.
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
// When mmap fails, the `data`, `dataref` and `datasz` may be reset to
// zero values, and there is no way to reload free page IDs in this case.
if tx.db.data != nil {
if !tx.db.hasSyncedFreelist() {
// Reconstruct free page list by scanning the DB to get the whole free page list.
// Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode.
tx.db.freelist.noSyncReload(tx.db.freepages())
} else {
// Read free page list from freelist page.
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
}
}
}
tx.close()
Expand Down