Skip to content

Commit

Permalink
Add support for comparing graphs
Browse files Browse the repository at this point in the history
Previously, trying to call Equal on a graph would result in a stack-overflow
due to infinite recursion traversing cycles on a graph.
While a vast majority of Go values are trees or acyclic graphs, there exist
a small number of cases where graph equality is required.

As such, we add cycle detection to Equal and define what it means for two graphs
to be equal. Contrary to reflect.DeepEqual, which declares two graphs to be
equal so long any cycle were encountered, we require two graphs to have
equivalent graph structures.

Mathematically speaking, a graph G is a tuple (V, E) consisting of the set of
vertices and edges in that graph. Graphs G1 and G2 are equal if V1 == V2,
E1 == E2, and both have the same root vertex (entry point into the graph).
When traversing G1 and G2, we remember a stack of previously visited edges
ES1 and ES2. If the current edge e1 is in ES1 or e2 is in ES2, then we know that
a cycle exists. The graphs have the same structure when the previously
encountered edge ep1 and ep2 were encountered together.
Note that edges and vertices unreachable from the root vertex are ignored.

Appreciation goes to Eyal Posener (@posener), who proposed a different
(but semantically equivalent) approach in #79, which served as inspiration.

Fixes #74
  • Loading branch information
dsnet committed Mar 28, 2018
1 parent 5411ab9 commit 132fdc6
Show file tree
Hide file tree
Showing 4 changed files with 175 additions and 14 deletions.
82 changes: 68 additions & 14 deletions cmp/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ var nothing = reflect.Value{}
// To equate empty slices and maps, consider using cmpopts.EquateEmpty.
// Map keys are equal according to the == operator.
// To use custom comparisons for map keys, consider using cmpopts.SortMaps.
//
// When recursing into a pointer, slice, or map, the current path is checked
// to detect whether the address for the given pointer, slice element,
// or map has already been visited. If there is a cycle, then the pointed to
// values are considered equal only if both addresses were previously visited
// in the same path step.
func Equal(x, y interface{}, opts ...Option) bool {
s := newState(opts)
s.compareAny(reflect.ValueOf(x), reflect.ValueOf(y))
Expand Down Expand Up @@ -111,6 +117,7 @@ type state struct {
// Calling statelessCompare must not result in observable changes to these.
result diff.Result // The current result of comparison
curPath Path // The current path in the value tree
curPtrs pointerPath // The current set of visited pointers
reporter reporter // Optional reporter used for difference formatting

// recChecker checks for infinite cycles applying the same set of
Expand All @@ -128,6 +135,7 @@ type state struct {

func newState(opts []Option) *state {
s := new(state)
s.curPtrs.Init()
for _, opt := range opts {
s.processOption(opt)
}
Expand Down Expand Up @@ -170,9 +178,9 @@ func (s *state) processOption(opt Option) {
// This function is stateless in that it does not alter the current result,
// or output to any registered reporters.
func (s *state) statelessCompare(vx, vy reflect.Value) diff.Result {
// We do not save and restore the curPath because all of the compareX
// methods should properly push and pop from the path.
// It is an implementation bug if the contents of curPath differs from
// We do not save and restore curPath and curPtrs because all of the
// compareX methods should properly push and pop from them.
// It is an implementation bug if the contents of the paths differ from
// when calling this function to when returning from it.

oldResult, oldReporter := s.result, s.reporter
Expand All @@ -185,7 +193,6 @@ func (s *state) statelessCompare(vx, vy reflect.Value) diff.Result {
}

func (s *state) compareAny(vx, vy reflect.Value) {
// TODO: Support cyclic data structures.
s.recChecker.Check(s.curPath)

// Rule 0: Differing types are never equal.
Expand Down Expand Up @@ -247,6 +254,13 @@ func (s *state) compareAny(vx, vy reflect.Value) {
}
s.curPath.push(&indirect{pathStep{t.Elem()}})
defer s.curPath.pop()

if eq, visited := s.curPtrs.Push(vx, vy); visited {
s.report(eq, vx, vy)
return
}
defer s.curPtrs.Pop(vx, vy)

s.compareAny(vx.Elem(), vy.Elem())
return
case reflect.Interface:
Expand All @@ -267,6 +281,16 @@ func (s *state) compareAny(vx, vy reflect.Value) {
s.report(vx.IsNil() && vy.IsNil(), vx, vy)
return
}

// NOTE: It is incorrect to call s.curPtrs.Push here as a slice is
// functionally a collection of pointers, rather than a single pointer.
// The pointer checking logic must be handled on a per-element basis
// in s.compareArray.
//
// For example v[:0] and v[:1] have the same slice pointer,
// but they are clearly different values. Using the slice pointer
// violates the assumption that equal pointers implies equal values.

fallthrough
case reflect.Array:
s.compareArray(vx, vy, t)
Expand Down Expand Up @@ -399,21 +423,34 @@ func (s *state) compareArray(vx, vy reflect.Value, t reflect.Type) {
step := &sliceIndex{pathStep{t.Elem()}, 0, 0}
s.curPath.push(step)

// Compute an edit-script for slices vx and vy.
es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
step.xkey, step.ykey = ix, iy
return s.statelessCompare(vx.Index(ix), vy.Index(iy))
})

// Report the entire slice as is if the arrays are of primitive kind,
// and the arrays are different enough.
// Checking element pointers is necessary for slices of non-primitives,
// as they may contain slices that sub-slice some parent slice.
// The elements of a slice are always addressable.
isPrimitive := false
switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
isPrimitive = true
}
checkPointer := t.Kind() == reflect.Slice && !isPrimitive

// Compute an edit-script for slices vx and vy.
es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
step.xkey, step.ykey = ix, iy
vvx, vvy := vx.Index(ix), vy.Index(iy)
if checkPointer {
px, py := vvx.Addr(), vvy.Addr()
if eq, visited := s.curPtrs.Push(px, py); visited {
return diff.BoolResult(eq)
}
defer s.curPtrs.Pop(px, py)
}
return s.statelessCompare(vvx, vvy)
})

// Report the entire slice as is if the arrays are of primitive kind,
// and the arrays are different enough.
if isPrimitive && es.Dist() > (vx.Len()+vy.Len())/4 {
s.curPath.pop() // Pop first since we are reporting the whole slice
s.report(false, vx, vy)
Expand All @@ -434,10 +471,21 @@ func (s *state) compareArray(vx, vy reflect.Value, t reflect.Type) {
iy++
default:
step.xkey, step.ykey = ix, iy
vvx, vvy := vx.Index(ix), vy.Index(iy)
if e == diff.Identity {
s.report(true, vx.Index(ix), vy.Index(iy))
s.report(true, vvx, vvy)
} else {
s.compareAny(vx.Index(ix), vy.Index(iy))
if checkPointer {
px, py := vvx.Addr(), vvy.Addr()
if eq, visited := s.curPtrs.Push(px, py); visited {
s.report(eq, vvx, vvy)
} else {
s.compareAny(vvx, vvy)
s.curPtrs.Pop(px, py)
}
} else {
s.compareAny(vvx, vvy)
}
}
ix++
iy++
Expand All @@ -453,6 +501,12 @@ func (s *state) compareMap(vx, vy reflect.Value, t reflect.Type) {
return
}

if eq, visited := s.curPtrs.Push(vx, vy); visited {
s.report(eq, vx, vy)
return
}
defer s.curPtrs.Pop(vx, vy)

// We combine and sort the two map keys so that we can perform the
// comparisons in a deterministic order.
step := &mapIndex{pathStep: pathStep{t.Elem()}}
Expand Down
31 changes: 31 additions & 0 deletions cmp/compare_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ func TestDiff(t *testing.T) {
tests = append(tests, transformerTests()...)
tests = append(tests, embeddedTests()...)
tests = append(tests, methodTests()...)
tests = append(tests, cycleTests()...)
tests = append(tests, project1Tests()...)
tests = append(tests, project2Tests()...)
tests = append(tests, project3Tests()...)
Expand Down Expand Up @@ -1554,6 +1555,36 @@ func methodTests() []test {
}}
}

func cycleTests() []test {
const label = "Cycle"

type A *A
graphA := new(A)
*graphA = graphA

type B []B
graphB := B{nil}
graphB[0] = graphB

type C map[int]C
graphC := C{0: nil}
graphC[0] = graphC

return []test{{
label: label,
x: graphA,
y: graphA,
}, {
label: label,
x: graphB,
y: graphB,
}, {
label: label,
x: graphC,
y: graphC,
}}
}

func project1Tests() []test {
const label = "Project1"

Expand Down
8 changes: 8 additions & 0 deletions cmp/internal/diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ type EqualFunc func(ix int, iy int) Result
// NDiff is the number of sub-elements that are not equal.
type Result struct{ NSame, NDiff int }

// BoolResult returns a Result that is either Equal or not Equal.
func BoolResult(b bool) Result {
if b {
return Result{NSame: 1} // Equal, Similar
}
return Result{NDiff: 2} // Not Equal, not Similar
}

// Equal indicates whether the symbols are equal. Two symbols are equal
// if and only if NDiff == 0. If Equal, then they are also Similar.
func (r Result) Equal() bool { return r.NDiff == 0 }
Expand Down
68 changes: 68 additions & 0 deletions cmp/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"strings"
"unicode"
"unicode/utf8"

"github.com/google/go-cmp/cmp/internal/value"
)

type (
Expand Down Expand Up @@ -291,6 +293,72 @@ var (
_ PathStep = transform{}
)

// pointerPath represents a dual-stack of pointers encountered when
// recursively traversing the x and y values. This data structure supports
// detection of cycles and determining whether the cycles are equal.
// In Go, cycles can occur via pointers, slices, and maps.
//
// The pointerPath uses a map to represent a stack; where descension into a
// pointer pushes the address onto the stack, and ascension from a pointer
// pops the address from the stack. Thus, when traversing into a pointer from
// reflect.Ptr, reflect.Slice element, or reflect.Map, we can detect cycles
// by checking whether the pointer has already been visited. The cycle detection
// uses a seperate stack for the x and y values.
//
// If a cycle is detected we need to determine whether the two pointers
// should be considered equal. The definition of equality chosen by Equal
// requires two graphs to have the same structure. To determine this, both the
// x and y values must have a cycle where the previous pointers were also
// encountered together as a pair.
//
// Semantically, this is equivalent to augmenting Indirect, SliceIndex, and
// MapIndex with pointer information for the x and y values.
// Suppose px and py are two pointers to compare, we then search the
// Path for whether px was ever encountered in the Path history of x, and
// similarly so with py. If either side has a cycle, the comparison is only
// equal if both px and py have a cycle resulting from the same PathStep.
//
// Using a map as a stack is more performant as we can perform cycle detection
// in O(1) instead of O(N) where N is len(Path).
type pointerPath struct {
// mx is keyed by x pointers, where the value is the associated y pointer.
mx map[value.Pointer]value.Pointer
// my is keyed by y pointers, where the value is the associated x pointer.
my map[value.Pointer]value.Pointer
}

func (p *pointerPath) Init() {
p.mx = make(map[value.Pointer]value.Pointer)
p.my = make(map[value.Pointer]value.Pointer)
}

// Push indicates intent to descend into pointers vx and vy where
// visited reports whether either has been seen before. If visited before,
// equal reports whether both pointers were encountered together.
// Pop must be called if and only if the pointers were never visited.
//
// The pointers vx and vy must be a reflect.Ptr, reflect.Slice, or reflect.Map
// and be non-nil.
func (p pointerPath) Push(vx, vy reflect.Value) (equal, visited bool) {
px := value.PointerOf(vx)
py := value.PointerOf(vy)
_, ok1 := p.mx[px]
_, ok2 := p.my[py]
if ok1 || ok2 {
equal = p.mx[px] == py && p.my[py] == px // Pointers paired together
return equal, true
}
p.mx[px] = py
p.my[py] = px
return false, false
}

// Pop ascends from pointers vx and vy.
func (p pointerPath) Pop(vx, vy reflect.Value) {
delete(p.mx, value.PointerOf(vx))
delete(p.my, value.PointerOf(vy))
}

// isExported reports whether the identifier is exported.
func isExported(id string) bool {
r, _ := utf8.DecodeRuneInString(id)
Expand Down

0 comments on commit 132fdc6

Please sign in to comment.