diff --git a/freelist.go b/freelist.go index d441b6925..697a46968 100644 --- a/freelist.go +++ b/freelist.go @@ -2,7 +2,6 @@ package bbolt import ( "fmt" - "reflect" "sort" "unsafe" ) @@ -94,24 +93,8 @@ func (f *freelist) pending_count() int { return count } -// copyallunsafe copies a list of all free ids and all pending ids in one sorted list. +// copyall copies a list of all free ids and all pending ids in one sorted list. // f.count returns the minimum length required for dst. -func (f *freelist) copyallunsafe(dstptr unsafe.Pointer) { // dstptr is []pgid data pointer - m := make(pgids, 0, f.pending_count()) - for _, txp := range f.pending { - m = append(m, txp.ids...) - } - sort.Sort(m) - fpgids := f.getFreePageIDs() - sz := len(fpgids) + len(m) - dst := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(dstptr), - Len: sz, - Cap: sz, - })) - mergepgids(dst, fpgids, m) -} - func (f *freelist) copyall(dst []pgid) { m := make(pgids, 0, f.pending_count()) for _, txp := range f.pending { @@ -284,21 +267,23 @@ func (f *freelist) read(p *page) { } // If the page.count is at the max uint16 value (64k) then it's considered // an overflow and the size of the freelist is stored as the first element. - var idx, count uintptr = 0, uintptr(p.count) + var idx, count = 0, int(p.count) if count == 0xFFFF { idx = 1 - count = uintptr(*(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p)))) + c := *(*pgid)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) + count = int(c) + if count < 0 { + panic(fmt.Sprintf("leading element count %d overflows int", c)) + } } // Copy the list of page ids from the freelist. if count == 0 { f.ids = nil } else { - ids := *(*[]pgid)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + idx*unsafe.Sizeof(pgid(0)), - Len: int(count), - Cap: int(count), - })) + var ids []pgid + data := unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), unsafe.Sizeof(ids[0]), idx) + unsafeSlice(unsafe.Pointer(&ids), data, count) // copy the ids, so we don't modify on the freelist page directly idsCopy := make([]pgid, count) @@ -331,16 +316,22 @@ func (f *freelist) write(p *page) error { // The page.count can only hold up to 64k elements so if we overflow that // number then we handle it by putting the size in the first element. - lenids := f.count() - if lenids == 0 { - p.count = uint16(lenids) - } else if lenids < 0xFFFF { - p.count = uint16(lenids) - f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) + l := f.count() + if l == 0 { + p.count = uint16(l) + } else if l < 0xFFFF { + p.count = uint16(l) + var ids []pgid + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&ids), data, l) + f.copyall(ids) } else { p.count = 0xFFFF - *(*pgid)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) = pgid(lenids) - f.copyallunsafe(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + unsafe.Sizeof(pgid(0)))) + var ids []pgid + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&ids), data, l+1) + ids[0] = pgid(l) + f.copyall(ids[1:]) } return nil diff --git a/manydbs_test.go b/manydbs_test.go new file mode 100644 index 000000000..fbbe8fe2e --- /dev/null +++ b/manydbs_test.go @@ -0,0 +1,67 @@ +package bbolt + +import ( + "fmt" + "io/ioutil" + "math/rand" + "os" + "path/filepath" + "testing" +) + +func createDb(t *testing.T) (*DB, func()) { + // First, create a temporary directory to be used for the duration of + // this test. + tempDirName, err := ioutil.TempDir("", "bboltmemtest") + if err != nil { + t.Fatalf("error creating temp dir: %v", err) + } + path := filepath.Join(tempDirName, "testdb.db") + + bdb, err := Open(path, 0600, nil) + if err != nil { + t.Fatalf("error creating bbolt db: %v", err) + } + + cleanup := func() { + bdb.Close() + os.RemoveAll(tempDirName) + } + + return bdb, cleanup +} + +func createAndPutKeys(t *testing.T) { + t.Parallel() + + db, cleanup := createDb(t) + defer cleanup() + + bucketName := []byte("bucket") + + for i := 0; i < 100; i++ { + err := db.Update(func(tx *Tx) error { + nodes, err := tx.CreateBucketIfNotExists(bucketName) + if err != nil { + return err + } + + var key [16]byte + rand.Read(key[:]) + if err := nodes.Put(key[:], nil); err != nil { + return err + } + + return nil + }) + if err != nil { + t.Fatal(err) + } + } +} + +func TestManyDBs(t *testing.T) { + for i := 0; i < 100; i++ { + t.Run(fmt.Sprintf("%d", i), createAndPutKeys) + } +} diff --git a/node.go b/node.go index 1690eef3f..73988b5c4 100644 --- a/node.go +++ b/node.go @@ -3,7 +3,6 @@ package bbolt import ( "bytes" "fmt" - "reflect" "sort" "unsafe" ) @@ -208,36 +207,32 @@ func (n *node) write(p *page) { } // Loop over each item and write it to the page. - bp := uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes)) + // off tracks the offset into p of the start of the next data. + off := unsafe.Sizeof(*p) + n.pageElementSize()*uintptr(len(n.inodes)) for i, item := range n.inodes { _assert(len(item.key) > 0, "write: zero-length inode key") + // Create a slice to write into of needed size and advance + // byte pointer for next iteration. + sz := len(item.key) + len(item.value) + b := unsafeByteSlice(unsafe.Pointer(p), off, 0, sz) + off += uintptr(sz) + // Write the page element. if n.isLeaf { elem := p.leafPageElement(uint16(i)) - elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem))) + elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) elem.flags = item.flags elem.ksize = uint32(len(item.key)) elem.vsize = uint32(len(item.value)) } else { elem := p.branchPageElement(uint16(i)) - elem.pos = uint32(bp - uintptr(unsafe.Pointer(elem))) + elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem))) elem.ksize = uint32(len(item.key)) elem.pgid = item.pgid _assert(elem.pgid != p.id, "write: circular dependency occurred") } - // Create a slice to write into of needed size and advance - // byte pointer for next iteration. - klen, vlen := len(item.key), len(item.value) - sz := klen + vlen - b := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: bp, - Len: sz, - Cap: sz, - })) - bp += uintptr(sz) - // Write data for the element to the end of the page. l := copy(b, item.key) copy(b[l:], item.value) diff --git a/node_test.go b/node_test.go index 85a9dc218..eea4d2582 100644 --- a/node_test.go +++ b/node_test.go @@ -44,9 +44,9 @@ func TestNode_read_LeafPage(t *testing.T) { nodes[1] = leafPageElement{flags: 0, pos: 23, ksize: 10, vsize: 3} // pos = sizeof(leafPageElement) + 3 + 4 // Write data for the nodes at the end. - data := (*[4096]byte)(unsafe.Pointer(&nodes[2])) - copy(data[:], "barfooz") - copy(data[7:], "helloworldbye") + const s = "barfoozhelloworldbye" + data := unsafeByteSlice(unsafe.Pointer(&nodes[2]), 0, 0, len(s)) + copy(data, s) // Deserialize page into a leaf. n := &node{} diff --git a/page.go b/page.go index 415cd77ec..c9a158fb0 100644 --- a/page.go +++ b/page.go @@ -3,7 +3,6 @@ package bbolt import ( "fmt" "os" - "reflect" "sort" "unsafe" ) @@ -51,13 +50,13 @@ func (p *page) typ() string { // meta returns a pointer to the metadata section of the page. func (p *page) meta() *meta { - return (*meta)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p))) + return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) } // leafPageElement retrieves the leaf node by index func (p *page) leafPageElement(index uint16) *leafPageElement { - off := uintptr(index) * leafPageElementSize - return (*leafPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off)) + return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), + leafPageElementSize, int(index))) } // leafPageElements retrieves a list of leaf nodes. @@ -65,17 +64,16 @@ func (p *page) leafPageElements() []leafPageElement { if p.count == 0 { return nil } - return *(*[]leafPageElement)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p), - Len: int(p.count), - Cap: int(p.count), - })) + var elems []leafPageElement + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) + return elems } // branchPageElement retrieves the branch node by index func (p *page) branchPageElement(index uint16) *branchPageElement { - off := uintptr(index) * unsafe.Sizeof(branchPageElement{}) - return (*branchPageElement)(unsafe.Pointer(uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p) + off)) + return (*branchPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), + unsafe.Sizeof(branchPageElement{}), int(index))) } // branchPageElements retrieves a list of branch nodes. @@ -83,20 +81,15 @@ func (p *page) branchPageElements() []branchPageElement { if p.count == 0 { return nil } - return *(*[]branchPageElement)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(p)) + unsafe.Sizeof(*p), - Len: int(p.count), - Cap: int(p.count), - })) + var elems []branchPageElement + data := unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p)) + unsafeSlice(unsafe.Pointer(&elems), data, int(p.count)) + return elems } // dump writes n bytes of the page to STDERR as hex output. func (p *page) hexdump(n int) { - buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(p)), - Len: n, - Cap: n, - })) + buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, n) fmt.Fprintf(os.Stderr, "%x\n", buf) } @@ -115,11 +108,7 @@ type branchPageElement struct { // key returns a byte slice of the node key. func (n *branchPageElement) key() []byte { - return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos), - Len: int(n.ksize), - Cap: int(n.ksize), - })) + return unsafeByteSlice(unsafe.Pointer(n), 0, int(n.pos), int(n.pos)+int(n.ksize)) } // leafPageElement represents a node on a leaf page. @@ -132,20 +121,16 @@ type leafPageElement struct { // key returns a byte slice of the node key. func (n *leafPageElement) key() []byte { - return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos), - Len: int(n.ksize), - Cap: int(n.ksize), - })) + i := int(n.pos) + j := i + int(n.ksize) + return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) } // value returns a byte slice of the node value. func (n *leafPageElement) value() []byte { - return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(n)) + uintptr(n.pos) + uintptr(n.ksize), - Len: int(n.vsize), - Cap: int(n.vsize), - })) + i := int(n.pos) + int(n.ksize) + j := i + int(n.vsize) + return unsafeByteSlice(unsafe.Pointer(n), 0, i, j) } // PageInfo represents human readable information about a page. diff --git a/tx.go b/tx.go index 13937cdbf..4b1a64a8b 100644 --- a/tx.go +++ b/tx.go @@ -4,7 +4,6 @@ import ( "fmt" "io" "os" - "reflect" "sort" "strings" "time" @@ -524,24 +523,18 @@ func (tx *Tx) write() error { // Write pages to disk in order. for _, p := range pages { - size := (int(p.overflow) + 1) * tx.db.pageSize + rem := (uint64(p.overflow) + 1) * uint64(tx.db.pageSize) offset := int64(p.id) * int64(tx.db.pageSize) + var written uintptr // Write out page in "max allocation" sized chunks. - ptr := uintptr(unsafe.Pointer(p)) for { - // Limit our write to our max allocation size. - sz := size + sz := rem if sz > maxAllocSize-1 { sz = maxAllocSize - 1 } + buf := unsafeByteSlice(unsafe.Pointer(p), written, 0, int(sz)) - // Write chunk to disk. - buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: ptr, - Len: sz, - Cap: sz, - })) if _, err := tx.db.ops.writeAt(buf, offset); err != nil { return err } @@ -550,14 +543,14 @@ func (tx *Tx) write() error { tx.stats.Write++ // Exit inner for loop if we've written all the chunks. - size -= sz - if size == 0 { + rem -= sz + if rem == 0 { break } // Otherwise move offset forward and move pointer to next chunk. offset += int64(sz) - ptr += uintptr(sz) + written += uintptr(sz) } } @@ -576,11 +569,7 @@ func (tx *Tx) write() error { continue } - buf := *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{ - Data: uintptr(unsafe.Pointer(p)), - Len: tx.db.pageSize, - Cap: tx.db.pageSize, - })) + buf := unsafeByteSlice(unsafe.Pointer(p), 0, 0, tx.db.pageSize) // See https://go.googlesource.com/go/+/f03c9202c43e0abb130669852082117ca50aa9b1 for i := range buf { diff --git a/unsafe.go b/unsafe.go new file mode 100644 index 000000000..c0e503750 --- /dev/null +++ b/unsafe.go @@ -0,0 +1,39 @@ +package bbolt + +import ( + "reflect" + "unsafe" +) + +func unsafeAdd(base unsafe.Pointer, offset uintptr) unsafe.Pointer { + return unsafe.Pointer(uintptr(base) + offset) +} + +func unsafeIndex(base unsafe.Pointer, offset uintptr, elemsz uintptr, n int) unsafe.Pointer { + return unsafe.Pointer(uintptr(base) + offset + uintptr(n)*elemsz) +} + +func unsafeByteSlice(base unsafe.Pointer, offset uintptr, i, j int) []byte { + // See: https://github.com/golang/go/wiki/cgo#turning-c-arrays-into-go-slices + // + // This memory is not allocated from C, but it is unmanaged by Go's + // garbage collector and should behave similarly, and the compiler + // should produce similar code. Note that this conversion allows a + // subslice to begin after the base address, with an optional offset, + // while the URL above does not cover this case and only slices from + // index 0. However, the wiki never says that the address must be to + // the beginning of a C allocation (or even that malloc was used at + // all), so this is believed to be correct. + return (*[maxAllocSize]byte)(unsafeAdd(base, offset))[i:j:j] +} + +// unsafeSlice modifies the data, len, and cap of a slice variable pointed to by +// the slice parameter. This helper should be used over other direct +// manipulation of reflect.SliceHeader to prevent misuse, namely, converting +// from reflect.SliceHeader to a Go slice type. +func unsafeSlice(slice, data unsafe.Pointer, len int) { + s := (*reflect.SliceHeader)(slice) + s.Data = uintptr(data) + s.Cap = len + s.Len = len +}