Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix making too many create directory call #2828

Merged
merged 17 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion common/folderCreationTracker_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package common
type FolderCreationTracker interface {
CreateFolder(folder string, doCreation func() error) error
ShouldSetProperties(folder string, overwrite OverwriteOption, prompter Prompter) bool
StopTracking(folder string)
}

type Prompter interface {
Expand Down
54 changes: 54 additions & 0 deletions common/trieForDirPath.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package common

import (
"strings"
)

type TrieNode struct {
children map[string]*TrieNode
TransferIndex uint32
UnregisteredButCreated bool
}

type Trie struct {
adreed-msft marked this conversation as resolved.
Show resolved Hide resolved
Root *TrieNode
}

func NewTrie() *Trie {
return &Trie{
Root: &TrieNode{children: make(map[string]*TrieNode)},
}
}

// InsertDirNode inserts the dirPath into the Trie and returns the corresponding node and if it had to be created
func (t *Trie) InsertDirNode(dirPath string) (*TrieNode, bool) {
node, _, created := t.getDirNodeHelper(dirPath, true)
return node, created
}

// GetDirNode returns the directory node if it exists
func (t *Trie) GetDirNode(dirPath string) (*TrieNode, bool) {
node, exists, _ := t.getDirNodeHelper(dirPath, false)
return node, exists
}

// getDirNodeHelper returns the node, if the node exists and if the node had to be created
func (t *Trie) getDirNodeHelper(dirPath string, createIfNotExists bool) (*TrieNode, bool, bool) {
node := t.Root
segments := strings.Split(dirPath, "/")
created := false
for _, segment := range segments {
child, exists := node.children[segment]
if !exists {
if createIfNotExists {
child = &TrieNode{children: make(map[string]*TrieNode)}
node.children[segment] = child
created = true
} else {
return nil, false, false
}
}
node = child
}
return node, true, created
}
95 changes: 95 additions & 0 deletions common/trieForDirPath_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package common

import (
"github.com/stretchr/testify/assert"
"testing"
)

func TestTrie_NewTrie(t *testing.T) {
a := assert.New(t)
trie := NewTrie()
a.NotNil(trie)
a.NotNil(trie.Root)
a.Empty(trie.Root.children)
}

func TestTrie_InsertDirNode(t *testing.T) {
a := assert.New(t)
// One Level
trie := NewTrie()

n1, created := trie.InsertDirNode("mydir")
a.NotNil(n1)
a.True(created)
a.Len(trie.Root.children, 1)
a.Contains(trie.Root.children, "mydir")
a.Empty(trie.Root.children["mydir"].children)

n2, created := trie.InsertDirNode("mydir")
a.NotNil(n2)
a.Equal(n1, n2)
a.False(created)

// Multiple Levels
trie = NewTrie()

n1, created = trie.InsertDirNode("mydir/mysubdir/lastlevel")
a.NotNil(n1)
a.True(created)
a.Len(trie.Root.children, 1)
a.Contains(trie.Root.children, "mydir")
a.Len(trie.Root.children["mydir"].children, 1)
a.Contains(trie.Root.children["mydir"].children, "mysubdir")
a.Len(trie.Root.children["mydir"].children["mysubdir"].children, 1)
a.Contains(trie.Root.children["mydir"].children["mysubdir"].children, "lastlevel")
a.Empty(trie.Root.children["mydir"].children["mysubdir"].children["lastlevel"].children)
a.Equal(trie.Root.children["mydir"].children["mysubdir"].children["lastlevel"], n1)

// Insert in middle
n2, created = trie.InsertDirNode("mydir/mysubdir")
a.False(created)
a.Equal(trie.Root.children["mydir"].children["mysubdir"], n2)

// Insert a different child
n3, created := trie.InsertDirNode("mydir/mysubdirsibling")
a.True(created)

a.Len(trie.Root.children["mydir"].children, 2)
a.Contains(trie.Root.children["mydir"].children, "mysubdir")
a.Contains(trie.Root.children["mydir"].children, "mysubdirsibling")
a.Empty(trie.Root.children["mydir"].children["mysubdirsibling"].children)
a.Equal(trie.Root.children["mydir"].children["mysubdirsibling"], n3)

}

func TestTrie_GetDirNode(t *testing.T) {
a := assert.New(t)
// One Level
trie := NewTrie()

n, ok := trie.GetDirNode("mydir/mysubdir/lastlevel")
a.Nil(n)
a.False(ok)

n1, _ := trie.InsertDirNode("mydir")
n2, ok := trie.GetDirNode("mydir")
a.True(ok)
a.Equal(n1, n2)

n1, _ = trie.InsertDirNode("mydir/mysubdir/lastlevel")
n2, _ = trie.InsertDirNode("mydir/mysubdirsibling")
n3, ok := trie.GetDirNode("mydir")
a.True(ok)
a.Equal(trie.Root.children["mydir"], n3)

n4, ok := trie.GetDirNode("mydir/mysubdir/lastlevel/actuallyiwantthisone")
a.Nil(n4)
a.False(ok)

_, ok = trie.GetDirNode("mydir/mysubdir")
a.True(ok)
_, ok = trie.GetDirNode("mydir/mysubdir/lastlevel")
a.True(ok)
_, ok = trie.GetDirNode("mydir/mysubdirsibling")
a.True(ok)
}
70 changes: 23 additions & 47 deletions ste/folderCreationTracker.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package ste

import (
"fmt"
"net/url"
"strings"
"sync"
Expand All @@ -21,10 +20,9 @@ func NewFolderCreationTracker(fpo common.FolderPropertyOption, plan *JobPartPlan
case common.EFolderPropertiesOption.AllFolders(),
common.EFolderPropertiesOption.AllFoldersExceptRoot():
return &jpptFolderTracker{ // This prevents a dependency cycle. Reviewers: Are we OK with this? Can you think of a better way to do it?
plan: plan,
mu: &sync.Mutex{},
contents: make(map[string]uint32),
unregisteredButCreated: make(map[string]struct{}),
plan: plan,
mu: &sync.Mutex{},
contents: common.NewTrie(),
}
case common.EFolderPropertiesOption.NoFolders():
// can't use simpleFolderTracker here, because when no folders are processed,
Expand All @@ -48,15 +46,10 @@ func (f *nullFolderTracker) ShouldSetProperties(folder string, overwrite common.
panic("wrong type of folder tracker has been instantiated. This type does not do any tracking")
}

func (f *nullFolderTracker) StopTracking(folder string) {
// noop (because we don't track anything)
}

type jpptFolderTracker struct {
plan IJobPartPlanHeader
mu *sync.Mutex
contents map[string]uint32
unregisteredButCreated map[string]struct{}
plan IJobPartPlanHeader
mu *sync.Mutex
contents *common.Trie
}

func (f *jpptFolderTracker) RegisterPropertiesTransfer(folder string, transferIndex uint32) {
Expand All @@ -67,13 +60,14 @@ func (f *jpptFolderTracker) RegisterPropertiesTransfer(folder string, transferIn
return // Never persist to dev-null
}

f.contents[folder] = transferIndex
fNode, _ := f.contents.InsertDirNode(folder)
fNode.TransferIndex = transferIndex

// We created it before it was enumerated-- Let's register that now.
if _, ok := f.unregisteredButCreated[folder]; ok {
if fNode.UnregisteredButCreated {
f.plan.Transfer(transferIndex).SetTransferStatus(common.ETransferStatus.FolderCreated(), false)
fNode.UnregisteredButCreated = false

delete(f.unregisteredButCreated, folder)
}
}

Expand All @@ -85,12 +79,15 @@ func (f *jpptFolderTracker) CreateFolder(folder string, doCreation func() error)
return nil // Never persist to dev-null
}

if idx, ok := f.contents[folder]; ok &&
f.plan.Transfer(idx).TransferStatus() == (common.ETransferStatus.FolderCreated()) {
// If the folder has already been created, then we don't need to create it again
fNode, addedToTrie := f.contents.InsertDirNode(folder)

if !addedToTrie && (f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.FolderCreated() ||
f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.Success()) {
return nil
}

if _, ok := f.unregisteredButCreated[folder]; ok {
if fNode.UnregisteredButCreated {
return nil
}

Expand All @@ -99,13 +96,14 @@ func (f *jpptFolderTracker) CreateFolder(folder string, doCreation func() error)
return err
}

if idx, ok := f.contents[folder]; ok {
if !addedToTrie {
// overwrite it's transfer status
f.plan.Transfer(idx).SetTransferStatus(common.ETransferStatus.FolderCreated(), false)
f.plan.Transfer(fNode.TransferIndex).SetTransferStatus(common.ETransferStatus.FolderCreated(), false)
} else {
// A folder hasn't been hit in traversal yet.
// Recording it in memory is OK, because we *cannot* resume a job that hasn't finished traversal.
f.unregisteredButCreated[folder] = struct{}{}
// We set the value to 0 as we just want to record it in memory
fNode.UnregisteredButCreated = true
}

return nil
Expand All @@ -127,8 +125,9 @@ func (f *jpptFolderTracker) ShouldSetProperties(folder string, overwrite common.
defer f.mu.Unlock()

var created bool
if idx, ok := f.contents[folder]; ok {
created = f.plan.Transfer(idx).TransferStatus() == common.ETransferStatus.FolderCreated()
if fNode, ok := f.contents.GetDirNode(folder); ok {
created = f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.FolderCreated() ||
f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.Success()
} else {
// This should not happen, ever.
// Folder property jobs register with the tracker before they start getting processed.
Expand Down Expand Up @@ -158,26 +157,3 @@ func (f *jpptFolderTracker) ShouldSetProperties(folder string, overwrite common.
panic("unknown overwrite option")
}
}

func (f *jpptFolderTracker) StopTracking(folder string) {
f.mu.Lock()
defer f.mu.Unlock()

if folder == common.Dev_Null {
return // Not possible to track this
}

// no-op, because tracking is now handled by jppt, anyway.
if _, ok := f.contents[folder]; ok {
delete(f.contents, folder)
} else {
currentContents := ""

for k, v := range f.contents {
currentContents += fmt.Sprintf("K: %s V: %d\n", k, v)
}

// double should never be hit, but *just in case*.
panic(common.NewAzCopyLogSanitizer().SanitizeLogMessage("Folder " + folder + " shouldn't finish tracking until it's been recorded\nCurrent Contents:\n" + currentContents))
}
}
Loading
Loading