Skip to content

Commit

Permalink
Exclude a list of paths from diff. (#81)
Browse files Browse the repository at this point in the history
* Exclude a list of paths from diff.

Gives the ability of excluding a list of jsonpaths (e.g. datetime, uuids, etc) from the diff.

---------

Co-authored-by: ppirooznia <payam.pirooznia@viasat.com>
  • Loading branch information
payam54 and ppirooznia authored Jul 19, 2024
1 parent b2db6d3 commit 65b96a4
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 6 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ Diff JSON and JSON-like structures in Python.
>>> diff({'a': [0, {'b': 4}, 1]}, {'a': [0, {'b': 5}, 1]})
{'a': {1: {'b': 5}}}

# You can exclude some jsonpaths from the diff (doesn't work if the value types are different)
>>> diff({'a': 1, 'b': {'b1': 20, 'b2': 21}, 'c': 3}, {'a': 1, 'b': {'b1': 22, 'b2': 23}, 'c': 30}, exclude_paths=['b.b1', 'c'])
{'b': {'b2': 23}}

# ...but similarity is taken into account
>>> diff({'a': [0, {'b': 4}, 1]}, {'a': [0, {'c': 5}, 1]})
{'a': {insert: [(1, {'c': 5})], delete: [1]}}
Expand Down
28 changes: 22 additions & 6 deletions jsondiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ def _set_diff(self, a, b):
s = s_common / n_tot if n_tot != 0 else 1.0
return self.options.syntax.emit_set_diff(a, b, s, added, removed), s

def _dict_diff(self, a, b):
def _dict_diff(self, a, b, exclude_paths, path):
"""
Computes the difference between two dictionaries.
"""
Expand All @@ -903,32 +903,42 @@ def _dict_diff(self, a, b):
added = {}
changed = {}
for k, v in a.items():
new_path = f'{path}.{k}' if path else k
if new_path in exclude_paths:
continue
w = b.get(k, missing)
if w is missing:
nremoved += 1
removed[k] = v
else:
nmatched += 1
d, s = self._obj_diff(v, w)
d, s = self._obj_diff(v, w, exclude_paths, new_path)
if s < 1.0:
changed[k] = d
smatched += 0.5 + 0.5 * s
for k, v in b.items():
if k not in a:
new_path = f'{path}.{k}' if path else k
if new_path in exclude_paths:
continue
nadded += 1
added[k] = v
n_tot = nremoved + nmatched + nadded
s = smatched / n_tot if n_tot != 0 else 1.0
return self.options.syntax.emit_dict_diff(a, b, s, added, changed, removed), s

def _obj_diff(self, a, b):
def _obj_diff(self, a, b, exclude_paths=None, path=''):
"""
Computes the difference between any two JSON-compatible objects.
"""
if not exclude_paths:
exclude_paths = []
if path in exclude_paths:
return {}, 1.0
if a is b:
return self.options.syntax.emit_value_diff(a, b, 1.0), 1.0
if isinstance(a, dict) and isinstance(b, dict):
return self._dict_diff(a, b)
return self._dict_diff(a, b, exclude_paths, path)
elif isinstance(a, tuple) and isinstance(b, tuple):
return self._list_diff(a, b)
elif isinstance(a, list) and isinstance(b, list):
Expand All @@ -940,15 +950,21 @@ def _obj_diff(self, a, b):
else:
return self.options.syntax.emit_value_diff(a, b, 1.0), 1.0

def diff(self, a, b, fp=None):
def diff(self, a, b, fp=None, exclude_paths: list = None) -> dict:
"""
Computes the difference between two JSON structures.
:param a: The original JSON structure.
:param b: The modified JSON structure.
:param fp: Optional file pointer to dump the diff to.
:param exclude_paths: Optional list of string paths to exclude from the diff.
"""
if not exclude_paths:
exclude_paths = []
if self.options.load:
a = self.options.loader(a)
b = self.options.loader(b)

d, s = self._obj_diff(a, b)
d, s = self._obj_diff(a, b, exclude_paths)

if self.options.marshal or self.options.dump:
d = self.marshal(d)
Expand Down
13 changes: 13 additions & 0 deletions tests/test_jsondiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,3 +318,16 @@ def test_yaml_dump_string_fp(self):
buffer = io.StringIO()
dumper(data, buffer)
self.assertEqual(expected, buffer.getvalue())

def test_exclude_paths(self):
differ = JsonDiffer()

a = {'a': 1, 'b': {'b1': 20, 'b2': 21}, 'c': 3}
b = {'a': 1, 'b': {'b1': 22, 'b2': 23}, 'c': 30}

exclude_paths = ['b.b1', 'c']

d = differ.diff(a, b, exclude_paths=exclude_paths)

# The diff should only contain changes that are not in the exclude_paths
self.assertEqual({'b': {'b2': 23}}, d)

0 comments on commit 65b96a4

Please sign in to comment.