Skip to content

Commit

Permalink
Options to short-circuit the optimization when close to optimal (#14)
Browse files Browse the repository at this point in the history
  • Loading branch information
ESultanik committed Sep 9, 2020
1 parent 37d1200 commit df79da4
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
21 changes: 18 additions & 3 deletions graphtage/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,20 @@ def main(argv=None) -> int:
default=None,
help=f'equivalent to `--to-mime {mime}`'
)
parser.add_argument('--match-if', '-m', type=str, default=None, help='only attempt to match two dictionaries if the provided expression is satisfied. For example, `--match-if "from[\'foo\'] == to[\'bar\']"` will mean that only a dictionary which has a "foo" key that has the same value as the other dictionary\'s "bar" key will be attempted to be paired')
parser.add_argument('--match-unless', '-u', type=str, default=None, help='similar to `--match-if`, but only attempt a match if the provided expression evaluates to `False`')
parser.add_argument('--match-if', '-m', type=str, default=None,
help='only attempt to match two dictionaries if the provided expression is satisfied. For '
'example, `--match-if "from[\'foo\'] == to[\'bar\']"` will mean that only a dictionary '
'which has a "foo" key that has the same value as the other dictionary\'s "bar" key will '
'be attempted to be paired')
parser.add_argument('--match-unless', '-u', type=str, default=None,
help='similar to `--match-if`, but only attempt a match if the provided expression evaluates '
'to `False`')
parser.add_argument('--only-edits', '-e', action='store_true', help='only print the edits rather than a full diff')
parser.add_argument('--max-edit-distance', type=int, default=0,
help='the edit distance at which the optimization will stop (default = 0)')
parser.add_argument('--edit-distance-delta', type=int, default=None,
help='an optional delta (distance from the optimal edit distance) at which the optimization'
'will stop')
formatting = parser.add_argument_group(title='output formatting')
formatting.add_argument('--format', '-f', choices=graphtage.FILETYPES_BY_TYPENAME.keys(), default=None,
help='output format for the diff (default is to use the format of FROM_PATH)')
Expand Down Expand Up @@ -306,7 +317,11 @@ def printer_type(*pos_args, **kwargs):
printer.newline()
had_edits = had_edits or edit.has_non_zero_cost()
else:
diff = from_tree.diff(to_tree)
diff = from_tree.diff(
to_tree,
max_edit_distance=args.max_edit_distance,
edit_distance_delta=args.edit_distance_delta
)
if args.format is not None:
formatter = graphtage.FILETYPES_BY_TYPENAME[args.format].get_default_formatter()
else:
Expand Down
21 changes: 19 additions & 2 deletions graphtage/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,11 +480,18 @@ def get_all_edits(self, node: 'TreeNode') -> Iterator[Edit]:
if edit.bounds().lower_bound > 0:
yield edit

def diff(self: T, node: 'TreeNode') -> Union[EditedTreeNode, T]:
def diff(
self: T,
node: 'TreeNode',
max_edit_distance: int = 0,
edit_distance_delta: Optional[int] = None
) -> Union[EditedTreeNode, T]:
"""Performs a diff against the provided node.
Args:
node: The node against which to perform the diff.
max_edit_distance: An optional edit distance at which the search will stop (default is zero).
edit_distance_delta: An optional edit distance delta (distance from optimal) at which the search will stop.
Returns:
Union[EditedTreeNode, T]: An edited version of this node with all edits being
Expand All @@ -498,12 +505,22 @@ def diff(self: T, node: 'TreeNode') -> Union[EditedTreeNode, T]:
prev_bounds = edit.bounds()
total_range = prev_bounds.upper_bound - prev_bounds.lower_bound
prev_range = total_range
if prev_bounds.upper_bound <= max_edit_distance or \
(edit_distance_delta is not None and total_range <= edit_distance_delta):
# we are already done
edit.on_diff(ret)
return ret
with DEFAULT_PRINTER.tqdm(leave=False, initial=0, total=total_range, desc='Diffing') as t:
while edit.valid and not edit.is_complete() and edit.tighten_bounds():
new_bounds = edit.bounds()
if new_bounds.upper_bound <= max_edit_distance:
break
new_range = new_bounds.upper_bound - new_bounds.lower_bound
if edit_distance_delta is not None and new_range <= edit_distance_delta:
break
if prev_range < new_range:
log.warning(f"The most recent call to `tighten_bounds()` on edit {edit} tightened its bounds from {prev_bounds} to {new_bounds}")
log.warning(f"The most recent call to `tighten_bounds()` on edit {edit} tightened its bounds from "
f"{prev_bounds} to {new_bounds}")
t.update(prev_range - new_range)
prev_range = new_range
edit.on_diff(ret)
Expand Down

0 comments on commit df79da4

Please sign in to comment.