From ff8b751fc2bf011c4107b93689b8d183bc481c03 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 23 Dec 2021 15:51:15 +0100 Subject: [PATCH] Fix issue #19: emtpy new git file --- tests/samples/sample8.diff | 6 ++++++ tests/test_parser.py | 30 +++++++++++++++++++++--------- unidiff/constants.py | 4 ++++ unidiff/patch.py | 32 +++++++++++++++++++++++++++++--- 4 files changed, 60 insertions(+), 12 deletions(-) diff --git a/tests/samples/sample8.diff b/tests/samples/sample8.diff index 5ab192e..8a27fe6 100644 --- a/tests/samples/sample8.diff +++ b/tests/samples/sample8.diff @@ -1,3 +1,6 @@ +diff --git a/boo.bin b/boo.bin +new file mode 100644 +index 0000000..ae000000 diff --git a/foo.bin b/foo.bin new file mode 100644 index 0000000..af000000 @@ -9,3 +12,6 @@ diff --git a/baz.bin b/baz.bin deleted file mode 100644 index af000000..0000000 Binary files a/baz.bin and /dev/null differ +diff --git a/fuz.bin b/fuz.bin +new file mode 100644 +index 0000000..ae000000 diff --git a/tests/test_parser.py b/tests/test_parser.py index e078c33..9f56484 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -250,26 +250,38 @@ def test_parse_diff_with_new_and_modified_binary_files(self): res = PatchSet(diff_file) # three file in the patch - self.assertEqual(len(res), 3) + self.assertEqual(len(res), 5) - # first file is added + # first empty file is added self.assertFalse(res[0].is_modified_file) self.assertFalse(res[0].is_removed_file) self.assertTrue(res[0].is_added_file) - self.assertTrue(res[0].is_binary_file) + self.assertFalse(res[0].is_binary_file) - # second file is modified - self.assertTrue(res[1].is_modified_file) + # second file is added + self.assertFalse(res[1].is_modified_file) self.assertFalse(res[1].is_removed_file) - self.assertFalse(res[1].is_added_file) + self.assertTrue(res[1].is_added_file) self.assertTrue(res[1].is_binary_file) - # third file is removed - self.assertFalse(res[2].is_modified_file) - self.assertTrue(res[2].is_removed_file) + # third file is modified + self.assertTrue(res[2].is_modified_file) + self.assertFalse(res[2].is_removed_file) self.assertFalse(res[2].is_added_file) self.assertTrue(res[2].is_binary_file) + # fourth file is removed + self.assertFalse(res[3].is_modified_file) + self.assertTrue(res[3].is_removed_file) + self.assertFalse(res[3].is_added_file) + self.assertTrue(res[3].is_binary_file) + + # fifth empty file is added + self.assertFalse(res[4].is_modified_file) + self.assertFalse(res[4].is_removed_file) + self.assertTrue(res[4].is_added_file) + self.assertFalse(res[4].is_binary_file) + def test_parse_round_trip_with_binary_files_in_diff(self): """Parse git diff with binary files though round trip""" utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff') diff --git a/unidiff/constants.py b/unidiff/constants.py index e3b1ae6..b6d8e3b 100644 --- a/unidiff/constants.py +++ b/unidiff/constants.py @@ -39,6 +39,10 @@ RE_DIFF_GIT_HEADER = re.compile( r'^diff --git (?Pa/[^\t\n]+) (?Pb/[^\t\n]+)') +# check diff git new file marker `new file mode 100644` +RE_DIFF_GIT_NEW_FILE = re.compile( + r'^new file mode \d+\n$') + # @@ (source offset, length) (target offset, length) @@ (section header) RE_HUNK_HEADER = re.compile( diff --git a/unidiff/patch.py b/unidiff/patch.py index 923d048..bf4363e 100644 --- a/unidiff/patch.py +++ b/unidiff/patch.py @@ -39,6 +39,7 @@ LINE_TYPE_NO_NEWLINE, LINE_VALUE_NO_NEWLINE, RE_DIFF_GIT_HEADER, + RE_DIFF_GIT_NEW_FILE, RE_HUNK_BODY_LINE, RE_HUNK_EMPTY_BODY_LINE, RE_HUNK_HEADER, @@ -63,7 +64,7 @@ def implements_to_string(cls): return cls else: from io import StringIO - from typing import Iterable, Optional, Union + from typing import Iterable, Optional, Union, Tuple open_file = open make_str = str implements_to_string = lambda x: x @@ -463,6 +464,7 @@ def __str__(self): def _parse(self, diff, encoding, metadata_only): # type: (StringIO, Optional[str], bool) -> None current_file = None + is_file_appended = False patch_info = None diff = enumerate(diff, 1) @@ -473,8 +475,10 @@ def _parse(self, diff, encoding, metadata_only): # check for a git file rename is_diff_git_header = RE_DIFF_GIT_HEADER.match(line) if is_diff_git_header: - if patch_info is None: - patch_info = PatchInfo() + if current_file is not None and not is_file_appended: + self.append(current_file) + current_file = None + patch_info = PatchInfo() source_file = is_diff_git_header.group('source') target_file = is_diff_git_header.group('target') if (source_file != DEV_NULL @@ -485,6 +489,22 @@ def _parse(self, diff, encoding, metadata_only): patch_info, source_file, target_file, None, None, is_rename=True) self.append(current_file) + is_file_appended = True + if source_file[2:] == target_file[2:]: + # preserve current_file for an empty new file + current_file = PatchedFile( + patch_info, source_file, target_file, + ) + is_file_appended = False + patch_info.append(line) + continue + + # check for a git empty new file + is_diff_git_new_file = RE_DIFF_GIT_NEW_FILE.match(line) + if is_diff_git_new_file: + if current_file is None or patch_info is None: + raise UnidiffParseError('Unexpected new file found: %s' % line) + current_file.source_file = DEV_NULL patch_info.append(line) continue @@ -498,6 +518,7 @@ def _parse(self, diff, encoding, metadata_only): if current_file is not None and not (current_file.is_rename and current_file.source_file == source_file): current_file = None + is_file_appended = False continue # check for target file header @@ -513,6 +534,7 @@ def _parse(self, diff, encoding, metadata_only): patch_info, source_file, target_file, source_timestamp, target_timestamp) self.append(current_file) + is_file_appended = True patch_info = None continue @@ -553,10 +575,14 @@ def _parse(self, diff, encoding, metadata_only): self.append(current_file) patch_info = None current_file = None + is_file_appended = True continue patch_info.append(line) + if current_file is not None and not is_file_appended: + self.append(current_file) + @classmethod def from_filename(cls, filename, encoding=DEFAULT_ENCODING, errors=None, newline=None): # type: (str, str, Optional[str]) -> PatchSet