-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Speedup open huge file #410
Changes from all commits
82d5990
afa9310
7656291
ce500b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,16 +9,16 @@ import ( | |
// freelist represents a list of all pages that are available for allocation. | ||
// It also tracks pages that have been freed but are still in use by open transactions. | ||
type freelist struct { | ||
ids []pgid // all free and available free page ids. | ||
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. | ||
cache map[pgid]bool // fast lookup of all free and pending page ids. | ||
ids []pgid // all free and available free page ids. | ||
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx. | ||
cache map[pgid]struct{} // fast lookup of all free and pending page ids. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A benchmark or some other data demonstrating the memory savings would be useful. I'm not a prior convinced that the memory savings here are noticeable in real world use. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Performance is one thing, and idiomatic code is another: |
||
} | ||
|
||
// newFreelist returns an empty, initialized freelist. | ||
func newFreelist() *freelist { | ||
return &freelist{ | ||
pending: make(map[txid][]pgid), | ||
cache: make(map[pgid]bool), | ||
cache: make(map[pgid]struct{}), | ||
} | ||
} | ||
|
||
|
@@ -113,13 +113,13 @@ func (f *freelist) free(txid txid, p *page) { | |
var ids = f.pending[txid] | ||
for id := p.id; id <= p.id+pgid(p.overflow); id++ { | ||
// Verify that page is not already free. | ||
if f.cache[id] { | ||
if _, ok := f.cache[id]; ok { | ||
panic(fmt.Sprintf("page %d already freed", id)) | ||
} | ||
|
||
// Add to the freelist and cache. | ||
ids = append(ids, id) | ||
f.cache[id] = true | ||
f.cache[id] = struct{}{} | ||
} | ||
f.pending[txid] = ids | ||
} | ||
|
@@ -152,7 +152,8 @@ func (f *freelist) rollback(txid txid) { | |
|
||
// freed returns whether a given page is in the free list. | ||
func (f *freelist) freed(pgid pgid) bool { | ||
return f.cache[pgid] | ||
_, ok := f.cache[pgid] | ||
return ok | ||
} | ||
|
||
// read initializes the freelist from a freelist page. | ||
|
@@ -174,7 +175,9 @@ func (f *freelist) read(p *page) { | |
copy(f.ids, ids) | ||
|
||
// Make sure they're sorted. | ||
sort.Sort(pgids(f.ids)) | ||
if !pgids(f.ids).isSorted() { | ||
sort.Sort(pgids(f.ids)) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you want to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're absolutely right |
||
} | ||
|
||
// Rebuild the page cache. | ||
|
@@ -212,18 +215,18 @@ func (f *freelist) reload(p *page) { | |
f.read(p) | ||
|
||
// Build a cache of only pending pages. | ||
pcache := make(map[pgid]bool) | ||
pcache := make(map[pgid]struct{}) | ||
for _, pendingIDs := range f.pending { | ||
for _, pendingID := range pendingIDs { | ||
pcache[pendingID] = true | ||
pcache[pendingID] = struct{}{} | ||
} | ||
} | ||
|
||
// Check each page in the freelist and build a new available freelist | ||
// with any pages not in the pending lists. | ||
var a []pgid | ||
for _, id := range f.ids { | ||
if !pcache[id] { | ||
if _, ok := pcache[id]; !ok { | ||
a = append(a, id) | ||
} | ||
} | ||
|
@@ -236,13 +239,13 @@ func (f *freelist) reload(p *page) { | |
|
||
// reindex rebuilds the free cache based on available and pending free lists. | ||
func (f *freelist) reindex() { | ||
f.cache = make(map[pgid]bool, len(f.ids)) | ||
f.cache = make(map[pgid]struct{}, len(f.ids)) | ||
for _, id := range f.ids { | ||
f.cache[id] = true | ||
f.cache[id] = struct{}{} | ||
} | ||
for _, pendingIDs := range f.pending { | ||
for _, pendingID := range pendingIDs { | ||
f.cache[pendingID] = true | ||
f.cache[pendingID] = struct{}{} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -139,6 +139,14 @@ type pgids []pgid | |
func (s pgids) Len() int { return len(s) } | ||
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] } | ||
func (s pgids) Less(i, j int) bool { return s[i] < s[j] } | ||
func (s pgids) isSorted() bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Package sort already has an IsSorted function. No need to add this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sort.IsSorted uses interfaces, so it is slower. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How much slower? Slower enough that it is worth adding more code to work around it? Numbers would help a lot here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you want numbers, you may measure. If you find I was wrong, I will appologise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Usually the onus is on you to prove that your performance claims are justified. Without having provided proofs that your optimizations were guided by a scientific method, it's to be expected that others will ask you to justify your claims. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @aybabtme , as I've said two times down in a page, I don't care, will this pool request be merged or not. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @funny-falcon thanks for your feedback, I hadn't yet read the other comments. I now see that this is an old PR, sorry for the confusion. We can disagree on the burden of proof another time maybe. Have a good day. |
||
for i := len(s)-1; i > 0; i-- { | ||
if s[i] < s[i-1] { | ||
return false | ||
} | ||
} | ||
return true | ||
} | ||
|
||
// merge returns the sorted union of a and b. | ||
func (a pgids) merge(b pgids) pgids { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain more how this lazy load helps?
It appears that you're trading
for
I'd like to see more detail about the benefits to understand why this is worth doing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you open file only for read, you don't need to read freelist at all ever.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The first commit in this series handles that already, no?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sometimes you want to have single code for both readonly open and readwrite simply cause you don't know will you write or not.
Anyway, I was never confident last commit will be accepted.