Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue #19: emtpy new git file #86

Merged
merged 1 commit into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions tests/samples/sample8.diff
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
diff --git a/boo.bin b/boo.bin
new file mode 100644
index 0000000..ae000000
diff --git a/foo.bin b/foo.bin
new file mode 100644
index 0000000..af000000
Expand All @@ -9,3 +12,6 @@ diff --git a/baz.bin b/baz.bin
deleted file mode 100644
index af000000..0000000
Binary files a/baz.bin and /dev/null differ
diff --git a/fuz.bin b/fuz.bin
new file mode 100644
index 0000000..ae000000
30 changes: 21 additions & 9 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,26 +250,38 @@ def test_parse_diff_with_new_and_modified_binary_files(self):
res = PatchSet(diff_file)

# three file in the patch
self.assertEqual(len(res), 3)
self.assertEqual(len(res), 5)

# first file is added
# first empty file is added
self.assertFalse(res[0].is_modified_file)
self.assertFalse(res[0].is_removed_file)
self.assertTrue(res[0].is_added_file)
self.assertTrue(res[0].is_binary_file)
self.assertFalse(res[0].is_binary_file)

# second file is modified
self.assertTrue(res[1].is_modified_file)
# second file is added
self.assertFalse(res[1].is_modified_file)
self.assertFalse(res[1].is_removed_file)
self.assertFalse(res[1].is_added_file)
self.assertTrue(res[1].is_added_file)
self.assertTrue(res[1].is_binary_file)

# third file is removed
self.assertFalse(res[2].is_modified_file)
self.assertTrue(res[2].is_removed_file)
# third file is modified
self.assertTrue(res[2].is_modified_file)
self.assertFalse(res[2].is_removed_file)
self.assertFalse(res[2].is_added_file)
self.assertTrue(res[2].is_binary_file)

# fourth file is removed
self.assertFalse(res[3].is_modified_file)
self.assertTrue(res[3].is_removed_file)
self.assertFalse(res[3].is_added_file)
self.assertTrue(res[3].is_binary_file)

# fifth empty file is added
self.assertFalse(res[4].is_modified_file)
self.assertFalse(res[4].is_removed_file)
self.assertTrue(res[4].is_added_file)
self.assertFalse(res[4].is_binary_file)

def test_parse_round_trip_with_binary_files_in_diff(self):
"""Parse git diff with binary files though round trip"""
utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff')
Expand Down
4 changes: 4 additions & 0 deletions unidiff/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
RE_DIFF_GIT_HEADER = re.compile(
r'^diff --git (?P<source>a/[^\t\n]+) (?P<target>b/[^\t\n]+)')

# check diff git new file marker `new file mode 100644`
RE_DIFF_GIT_NEW_FILE = re.compile(
r'^new file mode \d+\n$')


# @@ (source offset, length) (target offset, length) @@ (section header)
RE_HUNK_HEADER = re.compile(
Expand Down
32 changes: 29 additions & 3 deletions unidiff/patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
LINE_TYPE_NO_NEWLINE,
LINE_VALUE_NO_NEWLINE,
RE_DIFF_GIT_HEADER,
RE_DIFF_GIT_NEW_FILE,
RE_HUNK_BODY_LINE,
RE_HUNK_EMPTY_BODY_LINE,
RE_HUNK_HEADER,
Expand All @@ -63,7 +64,7 @@ def implements_to_string(cls):
return cls
else:
from io import StringIO
from typing import Iterable, Optional, Union
from typing import Iterable, Optional, Union, Tuple
Felixoid marked this conversation as resolved.
Show resolved Hide resolved
open_file = open
make_str = str
implements_to_string = lambda x: x
Expand Down Expand Up @@ -463,6 +464,7 @@ def __str__(self):
def _parse(self, diff, encoding, metadata_only):
# type: (StringIO, Optional[str], bool) -> None
current_file = None
is_file_appended = False
patch_info = None

diff = enumerate(diff, 1)
Expand All @@ -473,8 +475,10 @@ def _parse(self, diff, encoding, metadata_only):
# check for a git file rename
is_diff_git_header = RE_DIFF_GIT_HEADER.match(line)
if is_diff_git_header:
if patch_info is None:
patch_info = PatchInfo()
if current_file is not None and not is_file_appended:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could refactor this to avoid introducing the is_file_appended flag by always setting up a PatchedFile when a git header is found (and some minor changes to accommodate for that), will check.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAI remember I was in a rush of fixing it. Most probably, there is a more elegant way to fix it. I am 100% sure you are more confident with code! =)

self.append(current_file)
current_file = None
patch_info = PatchInfo()
source_file = is_diff_git_header.group('source')
target_file = is_diff_git_header.group('target')
if (source_file != DEV_NULL
Expand All @@ -485,6 +489,22 @@ def _parse(self, diff, encoding, metadata_only):
patch_info, source_file, target_file, None, None,
is_rename=True)
self.append(current_file)
is_file_appended = True
if source_file[2:] == target_file[2:]:
# preserve current_file for an empty new file
current_file = PatchedFile(
patch_info, source_file, target_file,
)
is_file_appended = False
patch_info.append(line)
continue

# check for a git empty new file
is_diff_git_new_file = RE_DIFF_GIT_NEW_FILE.match(line)
if is_diff_git_new_file:
if current_file is None or patch_info is None:
raise UnidiffParseError('Unexpected new file found: %s' % line)
current_file.source_file = DEV_NULL
patch_info.append(line)
continue

Expand All @@ -498,6 +518,7 @@ def _parse(self, diff, encoding, metadata_only):
if current_file is not None and not (current_file.is_rename and
current_file.source_file == source_file):
current_file = None
is_file_appended = False
continue

# check for target file header
Expand All @@ -513,6 +534,7 @@ def _parse(self, diff, encoding, metadata_only):
patch_info, source_file, target_file,
source_timestamp, target_timestamp)
self.append(current_file)
is_file_appended = True
patch_info = None
continue

Expand Down Expand Up @@ -553,10 +575,14 @@ def _parse(self, diff, encoding, metadata_only):
self.append(current_file)
patch_info = None
current_file = None
is_file_appended = True
continue

patch_info.append(line)

if current_file is not None and not is_file_appended:
self.append(current_file)

@classmethod
def from_filename(cls, filename, encoding=DEFAULT_ENCODING, errors=None, newline=None):
# type: (str, str, Optional[str]) -> PatchSet
Expand Down