From 33871893547989ad4e034fa9b1bebade6f692e6b Mon Sep 17 00:00:00 2001 From: Elliot Winkler Date: Tue, 12 May 2020 19:27:36 -0600 Subject: [PATCH] Retain original ordering of keys when diffing hashes If you have two hashes and there are missing or extra keys on either side, remember what the order of those keys were and use that same ordering when producing the diff instead of sticking those keys at the end of the diff. --- .rubocop.yml | 6 + .../operational_sequencers/array.rb | 69 +++-- lib/super_diff/operational_sequencers/base.rb | 77 +++--- lib/super_diff/operational_sequencers/hash.rb | 240 ++++++++++++++---- lib/super_diff/operations/unary_operation.rb | 3 + .../operational_sequencers/hash_including.rb | 10 +- .../integration/rspec/include_matcher_spec.rb | 4 +- spec/unit/equality_matcher_spec.rb | 156 ++++++++++++ 8 files changed, 436 insertions(+), 129 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index e65e0e10..4eb9a7fc 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -50,6 +50,8 @@ Metrics/BlockLength: - shared_context - shared_examples - shared_examples_for +Metrics/BlockNesting: + Enabled: false Metrics/ClassLength: Enabled: false Metrics/CyclomaticComplexity: @@ -115,10 +117,14 @@ Style/NegatedIf: Enabled: false Style/NegatedWhile: Enabled: false +Style/Next: + Enabled: false Style/NumericPredicate: Enabled: false Style/OneLineConditional: Enabled: false +Style/ParallelAssignment: + Enabled: false Style/PercentLiteralDelimiters: Enabled: false Style/PreferredHashMethods: diff --git a/lib/super_diff/operational_sequencers/array.rb b/lib/super_diff/operational_sequencers/array.rb index 6363f7e6..e82bca87 100644 --- a/lib/super_diff/operational_sequencers/array.rb +++ b/lib/super_diff/operational_sequencers/array.rb @@ -32,36 +32,15 @@ class LcsCallbacks public :operations def match(event) - operations << ::SuperDiff::Operations::UnaryOperation.new( - name: :noop, - collection: actual, - key: event.new_position, - value: event.new_element, - index: event.new_position, - index_in_collection: actual.index(event.new_element), - ) + add_noop_operation(event) end def discard_a(event) - operations << ::SuperDiff::Operations::UnaryOperation.new( - name: :delete, - collection: expected, - key: event.old_position, - value: event.old_element, - index: event.old_position, - index_in_collection: expected.index(event.old_element), - ) + add_delete_operation(event) end def discard_b(event) - operations << ::SuperDiff::Operations::UnaryOperation.new( - name: :insert, - collection: actual, - key: event.new_position, - value: event.new_element, - index: event.new_position, - index_in_collection: actual.index(event.new_element), - ) + add_insert_operation(event) end def change(event) @@ -77,21 +56,6 @@ def change(event) private - def add_change_operation(event, child_operations) - operations << ::SuperDiff::Operations::BinaryOperation.new( - name: :change, - left_collection: expected, - right_collection: actual, - left_key: event.old_position, - right_key: event.new_position, - left_value: event.old_element, - right_value: event.new_element, - left_index: event.old_position, - right_index: event.new_position, - child_operations: child_operations, - ) - end - def add_delete_operation(event) operations << Operations::UnaryOperation.new( name: :delete, @@ -113,6 +77,33 @@ def add_insert_operation(event) index_in_collection: actual.index(event.new_element), ) end + + def add_noop_operation(event) + operations << ::SuperDiff::Operations::UnaryOperation.new( + name: :noop, + collection: actual, + key: event.new_position, + value: event.new_element, + index: event.new_position, + index_in_collection: actual.index(event.new_element), + ) + end + + def add_change_operation(event, child_operations) + operations << ::SuperDiff::Operations::BinaryOperation.new( + name: :change, + left_collection: expected, + right_collection: actual, + left_key: event.old_position, + right_key: event.new_position, + left_value: event.old_element, + right_value: event.new_element, + left_index: event.old_position, + right_index: event.new_position, + child_operations: child_operations, + ) + end + end end end diff --git a/lib/super_diff/operational_sequencers/base.rb b/lib/super_diff/operational_sequencers/base.rb index 7b6aefb3..bcb53f35 100644 --- a/lib/super_diff/operational_sequencers/base.rb +++ b/lib/super_diff/operational_sequencers/base.rb @@ -10,35 +10,7 @@ def self.applies_to?(_expected, _actual) method_object [:expected!, :actual!] def call - i = 0 - operations = build_operation_sequence - - while i < unary_operations.length - operation = unary_operations[i] - next_operation = unary_operations[i + 1] - child_operations = possible_comparison_of(operation, next_operation) - - if child_operations - operations << Operations::BinaryOperation.new( - name: :change, - left_collection: operation.collection, - right_collection: next_operation.collection, - left_key: operation.key, - right_key: operation.key, - left_value: operation.collection[operation.key], - right_value: next_operation.collection[operation.key], - left_index: operation.index, - right_index: operation.index, - child_operations: child_operations, - ) - i += 2 - else - operations << operation - i += 1 - end - end - - operations + compressed_operations end protected @@ -53,6 +25,51 @@ def build_operation_sequence private + def compressed_operations + unary_operations = self.unary_operations + compressed_operations = build_operation_sequence + unmatched_delete_operations = [] + + unary_operations.each_with_index do |operation, index| + if ( + operation.name == :insert && + (delete_operation = unmatched_delete_operations.find { |op| op.key == operation.key }) && + (insert_operation = operation) + ) + unmatched_delete_operations.delete(delete_operation) + + if (child_operations = possible_comparison_of( + delete_operation, + insert_operation, + )) + compressed_operations.delete(delete_operation) + compressed_operations << Operations::BinaryOperation.new( + name: :change, + left_collection: delete_operation.collection, + right_collection: insert_operation.collection, + left_key: delete_operation.key, + right_key: insert_operation.key, + left_value: delete_operation.collection[operation.key], + right_value: insert_operation.collection[operation.key], + left_index: delete_operation.index_in_collection, + right_index: insert_operation.index_in_collection, + child_operations: child_operations, + ) + else + compressed_operations << insert_operation + end + else + if operation.name == :delete + unmatched_delete_operations << operation + end + + compressed_operations << operation + end + end + + compressed_operations + end + def possible_comparison_of(operation, next_operation) if should_compare?(operation, next_operation) sequence(operation.value, next_operation.value) @@ -65,7 +82,7 @@ def should_compare?(operation, next_operation) next_operation && operation.name == :delete && next_operation.name == :insert && - next_operation.index == operation.index + next_operation.key == operation.key end def sequence(expected, actual) diff --git a/lib/super_diff/operational_sequencers/hash.rb b/lib/super_diff/operational_sequencers/hash.rb index d8a0aacb..e017cf6a 100644 --- a/lib/super_diff/operational_sequencers/hash.rb +++ b/lib/super_diff/operational_sequencers/hash.rb @@ -8,12 +8,7 @@ def self.applies_to?(expected, actual) protected def unary_operations - all_keys.reduce([]) do |operations, key| - possibly_add_noop_to(operations, key) - possibly_add_delete_to(operations, key) - possibly_add_insert_to(operations, key) - operations - end + unary_operations_using_variant_of_patience_algorithm end def build_operation_sequence @@ -22,63 +17,210 @@ def build_operation_sequence private - def all_keys - actual.keys | expected.keys - end + def unary_operations_using_variant_of_patience_algorithm + operations = [] + aks, eks = actual.keys, expected.keys + previous_ei, ei = nil, 0 + ai = 0 - def possibly_add_noop_to(operations, key) - if should_add_noop_operation?(key) - operations << Operations::UnaryOperation.new( - name: :noop, - collection: actual, - key: key, - index: all_keys.index(key), - index_in_collection: actual.keys.index(key), - value: actual[key], - ) + # When diffing a hash, we're more interested in the 'actual' version + # than the 'expected' version, because that's the ultimate truth. + # Therefore, the diff is presented from the perspective of the 'actual' + # hash, and we start off by looping over it. + while ai < aks.size + ak = aks[ai] + av, ev = actual[ak], expected[ak] + # While we iterate over 'actual' in order, we jump all over + # 'expected', trying to match up its keys with the keys in 'actual' as + # much as possible. + ei = eks.index(ak) + + if should_add_noop_operation?(ak) + # (If we're here, it probably means that the key we're pointing to + # in the 'actual' and 'expected' hashes have the same value.) + + if ei && previous_ei && (ei - previous_ei) > 1 + # If we've jumped from one operation in the 'expected' hash to + # another operation later in 'expected' (due to the fact that the + # 'expected' hash is in a different order than 'actual'), collect + # any delete operations in between and add them to our operations + # array as deletes before adding the noop. If we don't do this + # now, then those deletes will disappear. (Again, we are mainly + # iterating over 'actual', so this is the only way to catch all of + # the keys in 'expected'.) + (previous_ei + 1).upto(ei - 1) do |ei2| + ek = eks[ei2] + ev2, av2 = expected[ek], actual[ek] + + if ( + (!actual.include?(ek) || ev != av2) && + operations.none? { |operation| + [:delete, :noop].include?(operation.name) && + operation.key == ek + } + ) + operations << Operations::UnaryOperation.new( + name: :delete, + collection: expected, + key: ek, + value: ev2, + index: ei2, + index_in_collection: ei2, + ) + end + end + end + + operations << Operations::UnaryOperation.new( + name: :noop, + collection: actual, + key: ak, + value: av, + index: ai, + index_in_collection: ai, + ) + else + # (If we're here, it probably means that the key in 'actual' isn't + # present in 'expected' or the values don't match.) + + if ( + (operations.empty? || operations.last.name == :noop) && + (ai == 0 || eks.include?(aks[ai - 1])) + ) + # If we go from a match in the last iteration to a missing or + # extra key in this one, or we're at the first key in 'actual' and + # it's missing or extra, look for deletes in the 'expected' hash + # and add them to our list of operations before we add the + # inserts. In most cases we will accomplish this by backtracking a + # bit to the key in 'expected' that matched the key in 'actual' we + # processed in the previous iteration (or just the first key in + # 'expected' if this is the first key in 'actual'), and then + # iterating from there through 'expected' until we reach the end + # or we hit some other condition (see below). + + start_index = + if ai > 0 + eks.index(aks[ai - 1]) + 1 + else + 0 + end + + start_index.upto(eks.size - 1) do |ei2| + ek = eks[ei2] + ev, av2 = expected[ek], actual[ek] + ai2 = aks.index(ek) + + if actual.include?(ek) && ev == av2 + # If the key in 'expected' we've landed on happens to be a + # match in 'actual', then stop, because it's going to be + # handled in some future iteration of the 'actual' loop. + break + elsif ( + aks[ai + 1 .. -1].any? { |k| + expected.include?(k) && expected[k] != actual[k] + } + ) + # While we backtracked a bit to iterate over 'expected', we + # now have to look ahead. If we will end up encountering a + # insert that matches this delete later, stop and go back to + # iterating over 'actual'. This is because the delete we would + # have added now will be added later when we encounter the + # associated insert, so we don't want to add it twice. + break + else + operations << Operations::UnaryOperation.new( + name: :delete, + collection: expected, + key: ek, + value: ev, + index: ei2, + index_in_collection: ei2, + ) + end + + if ek == ak && ev != av + # If we're pointing to the same key in 'expected' as in + # 'actual', but with different values, go ahead and add an + # insert now to accompany the delete added above. That way + # they appear together, which will be easier to read. + operations << Operations::UnaryOperation.new( + name: :insert, + collection: actual, + key: ak, + value: av, + index: ai, + index_in_collection: ai, + ) + end + end + end + + if ( + expected.include?(ak) && + ev != av && + operations.none? { |op| op.name == :delete && op.key == ak } + ) + # If we're here, it means that we didn't encounter any delete + # operations above for whatever reason and so we need to add a + # delete to represent the fact that the value for this key has + # changed. + operations << Operations::UnaryOperation.new( + name: :delete, + collection: expected, + key: ak, + value: expected[ak], + index: ei, + index_in_collection: ei, + ) + end + + if operations.none? { |op| op.name == :insert && op.key == ak } + # If we're here, it means that we didn't encounter any insert + # operations above. Since we already handled delete, the only + # alternative is that this key must not exist in 'expected', so + # we need to add an insert. + operations << Operations::UnaryOperation.new( + name: :insert, + collection: actual, + key: ak, + value: av, + index: ai, + index_in_collection: ai, + ) + end + end + + ai += 1 + previous_ei = ei end - end - def should_add_noop_operation?(key) - expected.include?(key) && - actual.include?(key) && - expected[key] == actual[key] - end + # The last thing to do is this: if there are keys in 'expected' that + # aren't in 'actual', and they aren't associated with any inserts to + # where they would have been added above, tack those deletes onto the + # end of our operations array. + (eks - aks - operations.map(&:key)).each do |ek| + ei = eks.index(ek) + ev = expected[ek] - def possibly_add_delete_to(operations, key) - if should_add_delete_operation?(key) operations << Operations::UnaryOperation.new( name: :delete, collection: expected, - key: key, - index: all_keys.index(key), - index_in_collection: expected.keys.index(key), - value: expected[key], + key: ek, + value: ev, + index: ei, + index_in_collection: ei, ) end - end - def should_add_delete_operation?(key) - expected.include?(key) && - (!actual.include?(key) || expected[key] != actual[key]) + operations end - def possibly_add_insert_to(operations, key) - if should_add_insert_operation?(key) - operations << Operations::UnaryOperation.new( - name: :insert, - collection: actual, - key: key, - index: all_keys.index(key), - index_in_collection: actual.keys.index(key), - value: actual[key], - ) - end + def should_add_noop_operation?(key) + expected.include?(key) && expected[key] == actual[key] end - def should_add_insert_operation?(key) - !expected.include?(key) || - (actual.include?(key) && expected[key] != actual[key]) + def all_keys + actual.keys | expected.keys end end end diff --git a/lib/super_diff/operations/unary_operation.rb b/lib/super_diff/operations/unary_operation.rb index 0420ad5b..ad26b820 100644 --- a/lib/super_diff/operations/unary_operation.rb +++ b/lib/super_diff/operations/unary_operation.rb @@ -6,6 +6,7 @@ class UnaryOperation :collection, :key, :value, + # TODO: Is this even used?? :index, :index_in_collection, ) @@ -15,6 +16,7 @@ def initialize( collection:, key:, value:, + # TODO: Is this even used?? index:, index_in_collection: index ) @@ -22,6 +24,7 @@ def initialize( @collection = collection @key = key @value = value + # TODO: Is this even used?? @index = index @index_in_collection = index_in_collection end diff --git a/lib/super_diff/rspec/operational_sequencers/hash_including.rb b/lib/super_diff/rspec/operational_sequencers/hash_including.rb index ec2019cd..3ea1f62c 100644 --- a/lib/super_diff/rspec/operational_sequencers/hash_including.rb +++ b/lib/super_diff/rspec/operational_sequencers/hash_including.rb @@ -8,9 +8,7 @@ def self.applies_to?(expected, actual) end def initialize(expected:, **rest) - super - - @expected = expected.expecteds.first + super(expected: expected.expecteds.first, **rest) end private @@ -21,12 +19,6 @@ def should_add_noop_operation?(key) expected[key] == actual[key] ) end - - def should_add_insert_operation?(key) - expected.include?(key) && - actual.include?(key) && - expected[key] != actual[key] - end end end end diff --git a/spec/integration/rspec/include_matcher_spec.rb b/spec/integration/rspec/include_matcher_spec.rb index bc53d0b5..e53e802d 100644 --- a/spec/integration/rspec/include_matcher_spec.rb +++ b/spec/integration/rspec/include_matcher_spec.rb @@ -214,9 +214,9 @@ alpha_line %|- city: "Hill Valley",| beta_line %|+ city: "Burbank",| # FIXME - # plain_line %| zip: "90210",| - plain_line %| zip: "90210"| + # alpha_line %|- state: "CA",| alpha_line %|- state: "CA"| + plain_line %| zip: "90210"| plain_line %| }| }, ) diff --git a/spec/unit/equality_matcher_spec.rb b/spec/unit/equality_matcher_spec.rb index 52279fbb..0c5c6c63 100644 --- a/spec/unit/equality_matcher_spec.rb +++ b/spec/unit/equality_matcher_spec.rb @@ -1056,6 +1056,162 @@ end end + context "given two equal-size, one-dimensional hashes where there is a mixture of missing and extra keys in relatively the same order" do + context "and the actual value is in relatively the same order as the expected" do + it "preserves the order of the keys as they are defined" do + actual_output = described_class.call( + expected: { + listed_count: 37_009, + created_at: "Tue Jan 13 19:28:24 +0000 2009", + favourites_count: 38, + geo_enabled: false, + verified: true, + statuses_count: 273_860, + media_count: 51_044, + contributors_enabled: false, + profile_background_color: "FFF1E0", + profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", + profile_background_tile: false, + profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg", + profile_image_url_https: "https://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg", + profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592", + }, + actual: { + listed_count: 37_009, + created_at: "Tue Jan 13 19:28:24 +0000 2009", + favourites_count: 38, + utc_offset: nil, + time_zone: nil, + statuses_count: 273_860, + media_count: 51_044, + contributors_enabled: false, + is_translator: false, + is_translation_enabled: false, + profile_background_color: "FFF1E0", + profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", + profile_background_tile: false, + profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592", + } + ) + + expected_output = <<~STR.strip + Differing hashes. + + #{ + colored do + alpha_line %(Expected: { listed_count: 37009, created_at: "Tue Jan 13 19:28:24 +0000 2009", favourites_count: 38, geo_enabled: false, verified: true, statuses_count: 273860, media_count: 51044, contributors_enabled: false, profile_background_color: "FFF1E0", profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", profile_background_tile: false, profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg", profile_image_url_https: "https://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg", profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592" }) + beta_line %( Actual: { listed_count: 37009, created_at: "Tue Jan 13 19:28:24 +0000 2009", favourites_count: 38, utc_offset: nil, time_zone: nil, statuses_count: 273860, media_count: 51044, contributors_enabled: false, is_translator: false, is_translation_enabled: false, profile_background_color: "FFF1E0", profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", profile_background_tile: false, profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592" }) + end + } + + Diff: + + #{ + colored do + plain_line %( {) + plain_line %( listed_count: 37009,) + plain_line %( created_at: "Tue Jan 13 19:28:24 +0000 2009",) + plain_line %( favourites_count: 38,) + alpha_line %(- geo_enabled: false,) + alpha_line %(- verified: true,) + beta_line %(+ utc_offset: nil,) + beta_line %(+ time_zone: nil,) + plain_line %( statuses_count: 273860,) + plain_line %( media_count: 51044,) + plain_line %( contributors_enabled: false,) + beta_line %(+ is_translator: false,) + beta_line %(+ is_translation_enabled: false,) + plain_line %( profile_background_color: "FFF1E0",) + plain_line %( profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png",) + plain_line %( profile_background_tile: false,) + alpha_line %(- profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg",) + alpha_line %(- profile_image_url_https: "https://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg",) + plain_line %( profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592") + plain_line %( }) + end + } + STR + + expect(actual_output).to eq(expected_output) + end + end + + context "and the actual value is in a different order than the expected" do + it "preserves the order of the keys as they are defined" do + actual_output = described_class.call( + expected: { + created_at: "Tue Jan 13 19:28:24 +0000 2009", + favourites_count: 38, + geo_enabled: false, + verified: true, + media_count: 51_044, + statuses_count: 273_860, + contributors_enabled: false, + profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", + profile_background_color: "FFF1E0", + profile_background_tile: false, + profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg", + listed_count: 37_009, + profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592", + }, + actual: { + listed_count: 37_009, + created_at: "Tue Jan 13 19:28:24 +0000 2009", + favourites_count: 38, + utc_offset: nil, + statuses_count: 273_860, + media_count: 51_044, + contributors_enabled: false, + is_translator: false, + is_translation_enabled: false, + profile_background_color: "FFF1E0", + profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", + profile_background_tile: false, + profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592", + } + ) + + expected_output = <<~STR.strip + Differing hashes. + + #{ + colored do + alpha_line %(Expected: { created_at: "Tue Jan 13 19:28:24 +0000 2009", favourites_count: 38, geo_enabled: false, verified: true, media_count: 51044, statuses_count: 273860, contributors_enabled: false, profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", profile_background_color: "FFF1E0", profile_background_tile: false, profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg", listed_count: 37009, profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592" }) + beta_line %( Actual: { listed_count: 37009, created_at: "Tue Jan 13 19:28:24 +0000 2009", favourites_count: 38, utc_offset: nil, statuses_count: 273860, media_count: 51044, contributors_enabled: false, is_translator: false, is_translation_enabled: false, profile_background_color: "FFF1E0", profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png", profile_background_tile: false, profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592" }) + end + } + + Diff: + + #{ + colored do + plain_line %( {) + plain_line %( listed_count: 37009,) + plain_line %( created_at: "Tue Jan 13 19:28:24 +0000 2009",) + plain_line %( favourites_count: 38,) + alpha_line %(- geo_enabled: false,) + alpha_line %(- verified: true,) + beta_line %(+ utc_offset: nil,) + plain_line %( statuses_count: 273860,) + plain_line %( media_count: 51044,) + plain_line %( contributors_enabled: false,) + beta_line %(+ is_translator: false,) + beta_line %(+ is_translation_enabled: false,) + plain_line %( profile_background_color: "FFF1E0",) + plain_line %( profile_background_image_url_https: "https://abs.twimg.com/images/themes/theme1/bg.png",) + plain_line %( profile_background_tile: false,) + alpha_line %(- profile_image_url: "http://pbs.twimg.com/profile_images/931156393108885504/EqEMtLhM_normal.jpg",) + plain_line %( profile_banner_url: "https://pbs.twimg.com/profile_banners/18949452/1581526592") + plain_line %( }) + end + } + STR + + expect(actual_output).to eq(expected_output) + end + end + end + context "given two hashes containing arrays with differing values" do it "returns a message along with the diff" do actual_output = described_class.call(