diff --git a/ChangeLog.md b/ChangeLog.md index 1e29217ab..d09d53366 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -6,6 +6,14 @@ ### Bug Fixes 1. Fixed an issue where AzCopy would not persist tokens when logging in via Device Code. ([#2361](https://github.com/Azure/azure-storage-azcopy/issues/2361)) +## Version 10.27.1 + +### Bug Fixes +1. Reverted a change that resulted in breaking file service transfers and a larger memory footprint. ([#2858](https://github.com/Azure/azure-storage-azcopy/issues/2858)[#2855](https://github.com/Azure/azure-storage-azcopy/issues/2855)) + +### Dependency updates +1. github.com/golang-jwt/jwt/v4 v4.5.0 -> v4.5.1 ([#2861](https://github.com/Azure/azure-storage-azcopy/issues/2861)) + ## Version 10.27.0 ### New Features diff --git a/common/folderCreationTracker_interface.go b/common/folderCreationTracker_interface.go index 798d2aee3..556e73691 100644 --- a/common/folderCreationTracker_interface.go +++ b/common/folderCreationTracker_interface.go @@ -7,6 +7,7 @@ package common type FolderCreationTracker interface { CreateFolder(folder string, doCreation func() error) error ShouldSetProperties(folder string, overwrite OverwriteOption, prompter Prompter) bool + StopTracking(folder string) } type Prompter interface { diff --git a/common/trieForDirPath.go b/common/trieForDirPath.go deleted file mode 100644 index 4f0e2c415..000000000 --- a/common/trieForDirPath.go +++ /dev/null @@ -1,54 +0,0 @@ -package common - -import ( - "strings" -) - -type TrieNode struct { - children map[string]*TrieNode - TransferIndex uint32 - UnregisteredButCreated bool -} - -type Trie struct { - Root *TrieNode -} - -func NewTrie() *Trie { - return &Trie{ - Root: &TrieNode{children: make(map[string]*TrieNode)}, - } -} - -// InsertDirNode inserts the dirPath into the Trie and returns the corresponding node and if it had to be created -func (t *Trie) InsertDirNode(dirPath string) (*TrieNode, bool) { - node, _, created := t.getDirNodeHelper(dirPath, true) - return node, created -} - -// GetDirNode returns the directory node if it exists -func (t *Trie) GetDirNode(dirPath string) (*TrieNode, bool) { - node, exists, _ := t.getDirNodeHelper(dirPath, false) - return node, exists -} - -// getDirNodeHelper returns the node, if the node exists and if the node had to be created -func (t *Trie) getDirNodeHelper(dirPath string, createIfNotExists bool) (*TrieNode, bool, bool) { - node := t.Root - segments := strings.Split(dirPath, "/") - created := false - for _, segment := range segments { - child, exists := node.children[segment] - if !exists { - if createIfNotExists { - child = &TrieNode{children: make(map[string]*TrieNode)} - node.children[segment] = child - created = true - } else { - return nil, false, false - } - } - node = child - } - return node, true, created -} diff --git a/common/trieForDirPath_test.go b/common/trieForDirPath_test.go deleted file mode 100644 index 56a98b2db..000000000 --- a/common/trieForDirPath_test.go +++ /dev/null @@ -1,95 +0,0 @@ -package common - -import ( - "github.com/stretchr/testify/assert" - "testing" -) - -func TestTrie_NewTrie(t *testing.T) { - a := assert.New(t) - trie := NewTrie() - a.NotNil(trie) - a.NotNil(trie.Root) - a.Empty(trie.Root.children) -} - -func TestTrie_InsertDirNode(t *testing.T) { - a := assert.New(t) - // One Level - trie := NewTrie() - - n1, created := trie.InsertDirNode("mydir") - a.NotNil(n1) - a.True(created) - a.Len(trie.Root.children, 1) - a.Contains(trie.Root.children, "mydir") - a.Empty(trie.Root.children["mydir"].children) - - n2, created := trie.InsertDirNode("mydir") - a.NotNil(n2) - a.Equal(n1, n2) - a.False(created) - - // Multiple Levels - trie = NewTrie() - - n1, created = trie.InsertDirNode("mydir/mysubdir/lastlevel") - a.NotNil(n1) - a.True(created) - a.Len(trie.Root.children, 1) - a.Contains(trie.Root.children, "mydir") - a.Len(trie.Root.children["mydir"].children, 1) - a.Contains(trie.Root.children["mydir"].children, "mysubdir") - a.Len(trie.Root.children["mydir"].children["mysubdir"].children, 1) - a.Contains(trie.Root.children["mydir"].children["mysubdir"].children, "lastlevel") - a.Empty(trie.Root.children["mydir"].children["mysubdir"].children["lastlevel"].children) - a.Equal(trie.Root.children["mydir"].children["mysubdir"].children["lastlevel"], n1) - - // Insert in middle - n2, created = trie.InsertDirNode("mydir/mysubdir") - a.False(created) - a.Equal(trie.Root.children["mydir"].children["mysubdir"], n2) - - // Insert a different child - n3, created := trie.InsertDirNode("mydir/mysubdirsibling") - a.True(created) - - a.Len(trie.Root.children["mydir"].children, 2) - a.Contains(trie.Root.children["mydir"].children, "mysubdir") - a.Contains(trie.Root.children["mydir"].children, "mysubdirsibling") - a.Empty(trie.Root.children["mydir"].children["mysubdirsibling"].children) - a.Equal(trie.Root.children["mydir"].children["mysubdirsibling"], n3) - -} - -func TestTrie_GetDirNode(t *testing.T) { - a := assert.New(t) - // One Level - trie := NewTrie() - - n, ok := trie.GetDirNode("mydir/mysubdir/lastlevel") - a.Nil(n) - a.False(ok) - - n1, _ := trie.InsertDirNode("mydir") - n2, ok := trie.GetDirNode("mydir") - a.True(ok) - a.Equal(n1, n2) - - n1, _ = trie.InsertDirNode("mydir/mysubdir/lastlevel") - n2, _ = trie.InsertDirNode("mydir/mysubdirsibling") - n3, ok := trie.GetDirNode("mydir") - a.True(ok) - a.Equal(trie.Root.children["mydir"], n3) - - n4, ok := trie.GetDirNode("mydir/mysubdir/lastlevel/actuallyiwantthisone") - a.Nil(n4) - a.False(ok) - - _, ok = trie.GetDirNode("mydir/mysubdir") - a.True(ok) - _, ok = trie.GetDirNode("mydir/mysubdir/lastlevel") - a.True(ok) - _, ok = trie.GetDirNode("mydir/mysubdirsibling") - a.True(ok) -} diff --git a/ste/folderCreationTracker.go b/ste/folderCreationTracker.go index 09d9551b7..434b1f44d 100644 --- a/ste/folderCreationTracker.go +++ b/ste/folderCreationTracker.go @@ -1,6 +1,7 @@ package ste import ( + "fmt" "net/url" "strings" "sync" @@ -20,9 +21,10 @@ func NewFolderCreationTracker(fpo common.FolderPropertyOption, plan *JobPartPlan case common.EFolderPropertiesOption.AllFolders(), common.EFolderPropertiesOption.AllFoldersExceptRoot(): return &jpptFolderTracker{ // This prevents a dependency cycle. Reviewers: Are we OK with this? Can you think of a better way to do it? - plan: plan, - mu: &sync.Mutex{}, - contents: common.NewTrie(), + plan: plan, + mu: &sync.Mutex{}, + contents: make(map[string]uint32), + unregisteredButCreated: make(map[string]struct{}), } case common.EFolderPropertiesOption.NoFolders(): // can't use simpleFolderTracker here, because when no folders are processed, @@ -46,10 +48,15 @@ func (f *nullFolderTracker) ShouldSetProperties(folder string, overwrite common. panic("wrong type of folder tracker has been instantiated. This type does not do any tracking") } +func (f *nullFolderTracker) StopTracking(folder string) { + // noop (because we don't track anything) +} + type jpptFolderTracker struct { - plan IJobPartPlanHeader - mu *sync.Mutex - contents *common.Trie + plan IJobPartPlanHeader + mu *sync.Mutex + contents map[string]uint32 + unregisteredButCreated map[string]struct{} } func (f *jpptFolderTracker) RegisterPropertiesTransfer(folder string, transferIndex uint32) { @@ -60,14 +67,13 @@ func (f *jpptFolderTracker) RegisterPropertiesTransfer(folder string, transferIn return // Never persist to dev-null } - fNode, _ := f.contents.InsertDirNode(folder) - fNode.TransferIndex = transferIndex + f.contents[folder] = transferIndex // We created it before it was enumerated-- Let's register that now. - if fNode.UnregisteredButCreated { + if _, ok := f.unregisteredButCreated[folder]; ok { f.plan.Transfer(transferIndex).SetTransferStatus(common.ETransferStatus.FolderCreated(), false) - fNode.UnregisteredButCreated = false + delete(f.unregisteredButCreated, folder) } } @@ -79,15 +85,12 @@ func (f *jpptFolderTracker) CreateFolder(folder string, doCreation func() error) return nil // Never persist to dev-null } - // If the folder has already been created, then we don't need to create it again - fNode, addedToTrie := f.contents.InsertDirNode(folder) - - if !addedToTrie && (f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.FolderCreated() || - f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.Success()) { + if idx, ok := f.contents[folder]; ok && + f.plan.Transfer(idx).TransferStatus() == (common.ETransferStatus.FolderCreated()) { return nil } - if fNode.UnregisteredButCreated { + if _, ok := f.unregisteredButCreated[folder]; ok { return nil } @@ -96,14 +99,13 @@ func (f *jpptFolderTracker) CreateFolder(folder string, doCreation func() error) return err } - if !addedToTrie { + if idx, ok := f.contents[folder]; ok { // overwrite it's transfer status - f.plan.Transfer(fNode.TransferIndex).SetTransferStatus(common.ETransferStatus.FolderCreated(), false) + f.plan.Transfer(idx).SetTransferStatus(common.ETransferStatus.FolderCreated(), false) } else { // A folder hasn't been hit in traversal yet. // Recording it in memory is OK, because we *cannot* resume a job that hasn't finished traversal. - // We set the value to 0 as we just want to record it in memory - fNode.UnregisteredButCreated = true + f.unregisteredButCreated[folder] = struct{}{} } return nil @@ -125,9 +127,8 @@ func (f *jpptFolderTracker) ShouldSetProperties(folder string, overwrite common. defer f.mu.Unlock() var created bool - if fNode, ok := f.contents.GetDirNode(folder); ok { - created = f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.FolderCreated() || - f.plan.Transfer(fNode.TransferIndex).TransferStatus() == common.ETransferStatus.Success() + if idx, ok := f.contents[folder]; ok { + created = f.plan.Transfer(idx).TransferStatus() == common.ETransferStatus.FolderCreated() } else { // This should not happen, ever. // Folder property jobs register with the tracker before they start getting processed. @@ -157,3 +158,26 @@ func (f *jpptFolderTracker) ShouldSetProperties(folder string, overwrite common. panic("unknown overwrite option") } } + +func (f *jpptFolderTracker) StopTracking(folder string) { + f.mu.Lock() + defer f.mu.Unlock() + + if folder == common.Dev_Null { + return // Not possible to track this + } + + // no-op, because tracking is now handled by jppt, anyway. + if _, ok := f.contents[folder]; ok { + delete(f.contents, folder) + } else { + currentContents := "" + + for k, v := range f.contents { + currentContents += fmt.Sprintf("K: %s V: %d\n", k, v) + } + + // double should never be hit, but *just in case*. + panic(common.NewAzCopyLogSanitizer().SanitizeLogMessage("Folder " + folder + " shouldn't finish tracking until it's been recorded\nCurrent Contents:\n" + currentContents)) + } +} diff --git a/ste/folderCreationTracker_test.go b/ste/folderCreationTracker_test.go index 35238eff2..ae3452f9e 100644 --- a/ste/folderCreationTracker_test.go +++ b/ste/folderCreationTracker_test.go @@ -32,27 +32,27 @@ import ( // This is mocked to test the folder creation tracker type mockedJPPH struct { folderName []string - index []int + index []int status []*JobPartPlanTransfer + } -func (jpph *mockedJPPH) CommandString() string { panic("Not implemented") } -func (jpph *mockedJPPH) GetRelativeSrcDstStrings(uint32) (string, string) { panic("Not implemented") } -func (jpph *mockedJPPH) JobPartStatus() common.JobStatus { panic("Not implemented") } -func (jpph *mockedJPPH) JobStatus() common.JobStatus { panic("Not implemented") } -func (jpph *mockedJPPH) SetJobPartStatus(common.JobStatus) { panic("Not implemented") } -func (jpph *mockedJPPH) SetJobStatus(common.JobStatus) { panic("Not implemented") } -func (jpph *mockedJPPH) Transfer(idx uint32) *JobPartPlanTransfer { +func (jpph *mockedJPPH) CommandString() string { panic("Not implemented") } +func (jpph *mockedJPPH) GetRelativeSrcDstStrings(uint32) (string, string) { panic("Not implemented") } +func (jpph *mockedJPPH) JobPartStatus() common.JobStatus { panic("Not implemented") } +func (jpph *mockedJPPH) JobStatus() common.JobStatus { panic("Not implemented") } +func (jpph *mockedJPPH) SetJobPartStatus(common.JobStatus) { panic("Not implemented") } +func (jpph *mockedJPPH) SetJobStatus(common.JobStatus) { panic("Not implemented") } +func (jpph *mockedJPPH) Transfer(idx uint32) *JobPartPlanTransfer { return jpph.status[idx] } -func (jpph *mockedJPPH) TransferSrcDstRelatives(uint32) (string, string) { panic("Not implemented") } -func (jpph *mockedJPPH) TransferSrcDstStrings(uint32) (string, string, bool) { - panic("Not implemented") -} -func (jpph *mockedJPPH) TransferSrcPropertiesAndMetadata(uint32) (common.ResourceHTTPHeaders, common.Metadata, blob.BlobType, blob.AccessTier, bool, bool, bool, common.InvalidMetadataHandleOption, common.EntityType, string, string, common.BlobTags) { +func (jpph *mockedJPPH) TransferSrcDstRelatives(uint32) (string, string) { panic("Not implemented") } +func (jpph *mockedJPPH) TransferSrcDstStrings(uint32) (string, string, bool) { panic("Not implemented") } +func (jpph *mockedJPPH) TransferSrcPropertiesAndMetadata(uint32) (common.ResourceHTTPHeaders, common.Metadata, blob.BlobType, blob.AccessTier, bool, bool, bool, common.InvalidMetadataHandleOption, common.EntityType, string, string, common.BlobTags) { panic("Not implemented") } + // This test verifies that when we call dir create for a directory, it is created only once, // even if multiple routines request it to be created. func TestFolderCreationTracker_directoryCreate(t *testing.T) { @@ -60,21 +60,23 @@ func TestFolderCreationTracker_directoryCreate(t *testing.T) { // create a plan with one registered and one unregistered folder folderReg := "folderReg" - folderUnReg := "folderUnReg" + folderUnReg := "folderUnReg" + plan := &mockedJPPH{ folderName: []string{folderReg, folderUnReg}, - index: []int{0, 1}, - status: []*JobPartPlanTransfer{ - &JobPartPlanTransfer{atomicTransferStatus: common.ETransferStatus.NotStarted()}, - &JobPartPlanTransfer{atomicTransferStatus: common.ETransferStatus.NotStarted()}, + index: []int{0, 1}, + status: []*JobPartPlanTransfer { + &JobPartPlanTransfer{atomicTransferStatus: common.ETransferStatus.NotStarted(),}, + &JobPartPlanTransfer{atomicTransferStatus: common.ETransferStatus.NotStarted(),}, }, } - fct := &jpptFolderTracker{ - plan: plan, - mu: &sync.Mutex{}, - contents: common.NewTrie(), + fct := &jpptFolderTracker{ + plan: plan, + mu: &sync.Mutex{}, + contents: make(map[string]uint32), + unregisteredButCreated: make(map[string]struct{}), } // 1. Register folder1 @@ -83,13 +85,13 @@ func TestFolderCreationTracker_directoryCreate(t *testing.T) { // Multiple calls to create folderReg should execute create only once. numOfCreations := int32(0) var wg sync.WaitGroup - doCreation := func() error { + doCreation := func() error{ atomic.AddInt32(&numOfCreations, 1) plan.status[0].atomicTransferStatus = common.ETransferStatus.FolderCreated() return nil } - ch := make(chan bool) + ch := make(chan bool) for i := 0; i < 50; i++ { wg.Add(1) go func() { @@ -98,7 +100,7 @@ func TestFolderCreationTracker_directoryCreate(t *testing.T) { wg.Done() }() } - close(ch) // this will cause all above go routines to start creating folder + close(ch) // this will cause all above go rotuines to start creating folder wg.Wait() a.Equal(int32(1), numOfCreations) @@ -106,7 +108,7 @@ func TestFolderCreationTracker_directoryCreate(t *testing.T) { // similar test for unregistered folder numOfCreations = 0 ch = make(chan bool) - doCreation = func() error { + doCreation = func() error{ atomic.AddInt32(&numOfCreations, 1) plan.status[1].atomicTransferStatus = common.ETransferStatus.FolderCreated() return nil @@ -124,4 +126,4 @@ func TestFolderCreationTracker_directoryCreate(t *testing.T) { wg.Wait() a.Equal(int32(1), numOfCreations) -} +} \ No newline at end of file diff --git a/ste/xfer-anyToRemote-folder.go b/ste/xfer-anyToRemote-folder.go index 173d3517f..afa20dcdc 100644 --- a/ste/xfer-anyToRemote-folder.go +++ b/ste/xfer-anyToRemote-folder.go @@ -82,6 +82,7 @@ func anyToRemote_folder(jptm IJobPartTransferMgr, info *TransferInfo, pacer pace } else { t := jptm.GetFolderCreationTracker() + defer t.StopTracking(s.DirUrlToString()) // don't need it after this routine shouldSetProps := t.ShouldSetProperties(s.DirUrlToString(), jptm.GetOverwriteOption(), jptm.GetOverwritePrompter()) if !shouldSetProps { jptm.LogAtLevelForCurrentTransfer(common.LogWarning, "Folder already exists, so due to the --overwrite option, its properties won't be set") diff --git a/ste/xfer-remoteToLocal-folder.go b/ste/xfer-remoteToLocal-folder.go index 3d3b65904..3e80f59ce 100644 --- a/ste/xfer-remoteToLocal-folder.go +++ b/ste/xfer-remoteToLocal-folder.go @@ -56,6 +56,7 @@ func remoteToLocal_folder(jptm IJobPartTransferMgr, pacer pacer, df downloaderFa // no chunks to schedule. Just run the folder handling operations t := jptm.GetFolderCreationTracker() + defer t.StopTracking(info.Destination) // don't need it after this routine err = common.CreateDirectoryIfNotExist(info.Destination, t) // we may create it here, or possible there's already a file transfer for the folder that has created it, or maybe it already existed before this job if err != nil {