diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 74731ec679cf..26606fb4efd1 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,9 @@ CHANGELOG Next Release (TBD) ================== +* bugfix: ``aws s3 sync``: Fix issue for unnecessarily resyncing files + on windows machines. + (`issue 843 `__) * bugfix: ``aws s3 sync``: Fix issue where keys were being decoded twice when syncing between buckets. (`issue 862 `__) diff --git a/awscli/customizations/s3/filegenerator.py b/awscli/customizations/s3/filegenerator.py index f78169c80286..16284830cc76 100644 --- a/awscli/customizations/s3/filegenerator.py +++ b/awscli/customizations/s3/filegenerator.py @@ -124,7 +124,7 @@ def list_files(self, path, dir_op): file_path = join(path, name) if isdir(file_path): names[i] = name + os.path.sep - names.sort() + self.normalize_sort(names, os.sep, '/') for name in names: file_path = join(path, name) if not self.should_ignore_file(file_path): @@ -141,6 +141,15 @@ def list_files(self, path, dir_op): size, last_update = get_file_stat(file_path) yield file_path, size, last_update + def normalize_sort(self, names, os_sep, character): + """ + The purpose of this function is to ensure that the same path seperator + is used when sorting. In windows, the path operator is a backslash as + opposed to a forward slash which can lead to differences in sorting + between s3 and a windows machine. + """ + names.sort(key=lambda item: item.replace(os_sep, character)) + def _check_paths_decoded(self, path, names): # We can get a UnicodeDecodeError if we try to listdir() and # can't decode the contents with sys.getfilesystemencoding(). In this diff --git a/tests/integration/customizations/s3/test_plugin.py b/tests/integration/customizations/s3/test_plugin.py index 336de2d679c2..4301ea477320 100644 --- a/tests/integration/customizations/s3/test_plugin.py +++ b/tests/integration/customizations/s3/test_plugin.py @@ -478,6 +478,22 @@ def test_s3_to_s3_sync_with_plus_char(self): p2 = aws('s3 sync s3://%s/ s3://%s/' % (bucket_name, bucket_name_2)) self.assertNotIn('copy:', p2.stdout) self.assertEqual('', p2.stdout) + + def test_sync_no_resync(self): + self.files.create_file('xyz123456789', contents='test1') + self.files.create_file(os.path.join('xyz1', 'test'), contents='test2') + self.files.create_file(os.path.join('xyz', 'test'), contents='test3') + bucket_name = self.create_bucket() + + p = aws('s3 sync %s s3://%s' % (self.files.rootdir, bucket_name)) + self.assert_no_errors(p) + self.assertTrue(self.key_exists(bucket_name, 'xyz123456789')) + self.assertTrue(self.key_exists(bucket_name, 'xyz1/test')) + self.assertTrue(self.key_exists(bucket_name, 'xyz/test')) + + p2 = aws('s3 sync %s s3://%s/' % (self.files.rootdir, bucket_name)) + self.assertNotIn('upload:', p2.stdout) + self.assertEqual('', p2.stdout) def test_sync_to_from_s3(self): bucket_name = self.create_bucket() diff --git a/tests/unit/customizations/s3/test_filegenerator.py b/tests/unit/customizations/s3/test_filegenerator.py index 8efa04a893c9..79a13df57ba1 100644 --- a/tests/unit/customizations/s3/test_filegenerator.py +++ b/tests/unit/customizations/s3/test_filegenerator.py @@ -344,6 +344,28 @@ def test_list_local_files_with_unicode_chars(self): self.assertEqual(values, expected_order) +class TestNormalizeSort(unittest.TestCase): + def test_normalize_sort(self): + names = ['xyz123456789', + 'xyz1' + os.path.sep + 'test', + 'xyz' + os.path.sep + 'test'] + ref_names = [names[2], names[1], names[0]] + filegenerator = FileGenerator(None, None, None) + filegenerator.normalize_sort(names, os.path.sep, '/') + for i in range(len(ref_names)): + self.assertEqual(ref_names[i], names[i]) + + def test_normalize_sort_backslash(self): + names = ['xyz123456789', + 'xyz1\\test', + 'xyz\\test'] + ref_names = [names[2], names[1], names[0]] + filegenerator = FileGenerator(None, None, None) + filegenerator.normalize_sort(names, '\\', '/') + for i in range(len(ref_names)): + self.assertEqual(ref_names[i], names[i]) + + class S3FileGeneratorTest(unittest.TestCase): def setUp(self): self.session = FakeSession()