-
Notifications
You must be signed in to change notification settings - Fork 159
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Concurrently load the in-memory cache index from disk #123
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ import ( | |
"net/http" | ||
"os" | ||
"path/filepath" | ||
"runtime" | ||
"sort" | ||
"sync" | ||
|
||
|
@@ -40,21 +41,36 @@ type diskCache struct { | |
func New(dir string, maxSizeBytes int64) cache.Cache { | ||
// Create the directory structure. | ||
hexLetters := []byte("0123456789abcdef") | ||
dirs := make([]string, len(hexLetters)*len(hexLetters)*3) | ||
idx := 0 | ||
|
||
for _, c1 := range hexLetters { | ||
for _, c2 := range hexLetters { | ||
subDir := string(c1) + string(c2) | ||
err := os.MkdirAll(filepath.Join(dir, cache.CAS.String(), subDir), os.FileMode(0744)) | ||
|
||
casSubDir := filepath.Join(dir, cache.CAS.String(), subDir) | ||
err := os.MkdirAll(casSubDir, os.FileMode(0777)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This FileMode change looks unrelated- I'm not sure if @buchgr has a reason to use 0744. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not the expert but I figured it's good practice to be as restrictive as possible. @bdittmer what's the reason for changing this to 777? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The use case is so that a group of users can admin the cache without root access. The permissions on disk are not 0777 but (0777 - umask) for dirs and (0666 - umask) for files. I think this permission change is OK, but it should land in a separate commit. Added an alternative permissions change in #128 (which will cause a merge conflict here- sorry). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem, thanks! |
||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
err = os.MkdirAll(filepath.Join(dir, cache.AC.String(), subDir), os.FileMode(0744)) | ||
|
||
acSubDir := filepath.Join(dir, cache.AC.String(), subDir) | ||
err = os.MkdirAll(acSubDir, os.FileMode(0777)) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
err = os.MkdirAll(filepath.Join(dir, cache.RAW.String(), subDir), os.FileMode(0744)) | ||
|
||
rawSubDir := filepath.Join(dir, cache.RAW.String(), subDir) | ||
err = os.MkdirAll(rawSubDir, os.FileMode(0777)) | ||
if err != nil { | ||
log.Fatal(err) | ||
} | ||
|
||
dirs[idx] = acSubDir | ||
dirs[idx+1] = casSubDir | ||
dirs[idx+2] = rawSubDir | ||
|
||
idx = idx + 3 | ||
} | ||
} | ||
|
||
|
@@ -80,7 +96,7 @@ func New(dir string, maxSizeBytes int64) cache.Cache { | |
log.Fatalf("Attempting to migrate the old directory structure to the new structure failed "+ | ||
"with error: %v", err) | ||
} | ||
err = cache.loadExistingFiles() | ||
err = cache.loadExistingFiles(dirs) | ||
if err != nil { | ||
log.Fatalf("Loading of existing cache entries failed due to error: %v", err) | ||
} | ||
|
@@ -119,32 +135,60 @@ func migrateDirectory(dir string) error { | |
// loadExistingFiles lists all files in the cache directory, and adds them to the | ||
// LRU index so that they can be served. Files are sorted by access time first, | ||
// so that the eviction behavior is preserved across server restarts. | ||
func (c *diskCache) loadExistingFiles() error { | ||
log.Printf("Loading existing files in %s.\n", c.dir) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add back this log message. |
||
|
||
func (c *diskCache) loadExistingFiles(dirs []string) error { | ||
// Walk the directory tree | ||
type NameAndInfo struct { | ||
info os.FileInfo | ||
name string | ||
} | ||
var files []NameAndInfo | ||
err := filepath.Walk(c.dir, func(name string, info os.FileInfo, err error) error { | ||
if !info.IsDir() { | ||
files = append(files, NameAndInfo{info, name}) | ||
|
||
filesChan := make(chan []NameAndInfo, len(dirs)) | ||
workChan := make(chan string) | ||
workers := runtime.NumCPU() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How many CPU cores have you benchmarked this with? I wonder if this is more constrained by disk access than by CPU time? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I cannot find the benchmarks (this landed in our branch several months ago) but iirc we saw ~40-50% startup time improvement on a 6-core MBP w/ SSD. If I get some time I'll try to run some additional benchmarks |
||
|
||
for i := 0; i < workers; i++ { | ||
go func(workChan <-chan string, filesChan chan<- []NameAndInfo) { | ||
for dir := range workChan { | ||
dirFiles := make([]NameAndInfo, 0) | ||
|
||
_ = filepath.Walk(dir, func(name string, info os.FileInfo, err error) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check the return value (and the other instances of this)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if using fastwalk would be faster? |
||
if !info.IsDir() { | ||
dirFiles = append(dirFiles, NameAndInfo{info, name}) | ||
} | ||
return nil | ||
}) | ||
|
||
filesChan <- dirFiles | ||
} | ||
}(workChan, filesChan) | ||
} | ||
|
||
for _, dir := range dirs { | ||
workChan <- dir | ||
} | ||
|
||
close(workChan) | ||
|
||
i := 0 | ||
for f := range filesChan { | ||
if len(f) > 0 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this check is required when using range? |
||
files = append(files, f...) | ||
} | ||
|
||
i++ | ||
if i == len(dirs) { | ||
break | ||
} | ||
return nil | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
log.Println("Sorting cache files by atime.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add back the log statement. |
||
close(filesChan) | ||
|
||
// Sort in increasing order of atime | ||
sort.Slice(files, func(i int, j int) bool { | ||
return atime.Get(files[i].info).Before(atime.Get(files[j].info)) | ||
mostynb marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}) | ||
|
||
log.Println("Building LRU index.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add back the log statement. |
||
for _, f := range files { | ||
relPath := f.name[len(c.dir)+1:] | ||
c.lru.Add(relPath, &lruItem{ | ||
|
@@ -153,7 +197,6 @@ func (c *diskCache) loadExistingFiles() error { | |
}) | ||
} | ||
|
||
log.Println("Finished loading disk cache files.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add back the log statement. |
||
return nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe it's worth adding another for loop over []string{"ac", "cas", "raw"} and avoid some mostly copy+pasted code?