Skip to content
This repository has been archived by the owner on Mar 9, 2019. It is now read-only.

Speedup open huge file #410

Closed
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,11 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
return nil, err
}

// Read in the freelist.
db.freelist = newFreelist()
db.freelist.read(db.page(db.meta().freelist))
if !db.readOnly {
// Read in the freelist.
db.freelist = newFreelist()
db.freelist.read(db.page(db.meta().freelist))
}

// Mark the database as opened and return.
return db, nil
Expand Down
27 changes: 14 additions & 13 deletions freelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ import (
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
ids []pgid // all free and available free page ids.
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
ids []pgid // all free and available free page ids.
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
cache map[pgid]struct{} // fast lookup of all free and pending page ids.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A benchmark or some other data demonstrating the memory savings would be useful. I'm not a prior convinced that the memory savings here are noticeable in real world use.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Performance is one thing, and idiomatic code is another:
a map[key]struct{} is just the idiomatic way to store a set. Golang doesn't have many idioms, they should not require more challenges to be accepted. As long as the code passes the tests of course.

}

// newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist {
return &freelist{
pending: make(map[txid][]pgid),
cache: make(map[pgid]bool),
cache: make(map[pgid]struct{}),
}
}

Expand Down Expand Up @@ -113,13 +113,13 @@ func (f *freelist) free(txid txid, p *page) {
var ids = f.pending[txid]
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
// Verify that page is not already free.
if f.cache[id] {
if _, ok := f.cache[id]; ok {
panic(fmt.Sprintf("page %d already freed", id))
}

// Add to the freelist and cache.
ids = append(ids, id)
f.cache[id] = true
f.cache[id] = struct{}{}
}
f.pending[txid] = ids
}
Expand Down Expand Up @@ -152,7 +152,8 @@ func (f *freelist) rollback(txid txid) {

// freed returns whether a given page is in the free list.
func (f *freelist) freed(pgid pgid) bool {
return f.cache[pgid]
_, ok := f.cache[pgid]
return ok
}

// read initializes the freelist from a freelist page.
Expand Down Expand Up @@ -212,18 +213,18 @@ func (f *freelist) reload(p *page) {
f.read(p)

// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
pcache := make(map[pgid]struct{})
for _, pendingIDs := range f.pending {
for _, pendingID := range pendingIDs {
pcache[pendingID] = true
pcache[pendingID] = struct{}{}
}
}

// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range f.ids {
if !pcache[id] {
if _, ok := pcache[id]; !ok {
a = append(a, id)
}
}
Expand All @@ -236,13 +237,13 @@ func (f *freelist) reload(p *page) {

// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
f.cache = make(map[pgid]bool, len(f.ids))
f.cache = make(map[pgid]struct{}, len(f.ids))
for _, id := range f.ids {
f.cache[id] = true
f.cache[id] = struct{}{}
}
for _, pendingIDs := range f.pending {
for _, pendingID := range pendingIDs {
f.cache[pendingID] = true
f.cache[pendingID] = struct{}{}
}
}
}