Skip to content

Commit

Permalink
diff: DoUnified support io.Reader
Browse files Browse the repository at this point in the history
  • Loading branch information
fcharlie committed Dec 14, 2024
1 parent b8510ba commit 8df8b75
Show file tree
Hide file tree
Showing 15 changed files with 289 additions and 75 deletions.
14 changes: 14 additions & 0 deletions modules/diferenco/diferenco.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@ const (
Patience
)

func (a Algorithm) String() string {
switch a {
case Histogram:
return "Histogram"
case Myers:
return "Myers"
case ONP:
return "O(NP)"
case Patience:
return "Patience"
}
return "Unknown"
}

// commonPrefixLength returns the length of the common prefix of two T slices.
func commonPrefixLength[E comparable](a, b []E) int {
n := min(len(a), len(b))
Expand Down
112 changes: 105 additions & 7 deletions modules/diferenco/diferenco_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"path/filepath"
"runtime"
"testing"
"time"

"github.com/antgroup/hugescm/modules/diferenco/color"
)
Expand All @@ -26,21 +27,64 @@ func TestDiff(t *testing.T) {
return
}
textB := string(bytesB)
aa := []Algorithm{Histogram, Myers, ONP, Patience}
for _, a := range aa {
now := time.Now()
u, err := DoUnified(context.Background(), &Options{
From: &File{
Path: "a.txt",
},
To: nil,
S1: textA,
S2: textB,
Algorithm: a,
})
if err != nil {
return
}
fmt.Fprintf(os.Stderr, "\x1b[32m%s --> use time: %v\x1b[0m\n%s\n", a, time.Since(now), u)
}

}

func TestPatchFD(t *testing.T) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
fd, err := os.Open(filepath.Join(dir, "testdata/a.txt"))
if err != nil {
fmt.Fprintf(os.Stderr, "read a error: %v\n", err)
return
}
defer fd.Close()
bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt"))
if err != nil {
fmt.Fprintf(os.Stderr, "read b error: %v\n", err)
return
}
textB := string(bytesB)
u, err := DoUnified(context.Background(), &Options{
From: &File{
Path: "a.txt",
Hash: "4789568",
Mode: 0o10644,
},
To: nil,
A: textA,
B: textB,
To: &File{
Path: "b.txt",
Hash: "6547898",
Mode: 0o10644,
},
R1: fd,
S2: textB,
})
if err != nil {
return
}
fmt.Fprintf(os.Stderr, "%s\n", u)
e := NewUnifiedEncoder(os.Stderr)
e.SetColor(color.NewColorConfig())
_ = e.Encode([]*Unified{u})
}

func TestDiff2(t *testing.T) {
func TestPatch(t *testing.T) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt"))
Expand All @@ -66,8 +110,62 @@ func TestDiff2(t *testing.T) {
Hash: "6547898",
Mode: 0o10644,
},
A: textA,
B: textB,
S1: textA,
S2: textB,
})
if err != nil {
return
}
e := NewUnifiedEncoder(os.Stderr)
e.SetColor(color.NewColorConfig())
_ = e.Encode([]*Unified{u})
}

func TestPatchNew(t *testing.T) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
bytesB, err := os.ReadFile(filepath.Join(dir, "testdata/b.txt"))
if err != nil {
fmt.Fprintf(os.Stderr, "read b error: %v\n", err)
return
}
textB := string(bytesB)
u, err := DoUnified(context.Background(), &Options{
From: nil,
To: &File{
Path: "a.txt",
Hash: "6547898",
Mode: 0o10644,
},
S1: "",
S2: textB,
})
if err != nil {
return
}
e := NewUnifiedEncoder(os.Stderr)
e.SetColor(color.NewColorConfig())
_ = e.Encode([]*Unified{u})
}

func TestPatchDelete(t *testing.T) {
_, filename, _, _ := runtime.Caller(0)
dir := filepath.Dir(filename)
bytesA, err := os.ReadFile(filepath.Join(dir, "testdata/a.txt"))
if err != nil {
fmt.Fprintf(os.Stderr, "read a error: %v\n", err)
return
}
textA := string(bytesA)
u, err := DoUnified(context.Background(), &Options{
From: &File{
Path: "a.txt",
Hash: "6547898",
Mode: 0o10644,
},
To: nil,
S1: textA,
S2: "",
})
if err != nil {
return
Expand Down
8 changes: 4 additions & 4 deletions modules/diferenco/diffmatchpatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ func TestDiffSlices(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
aa := sink.ParseLines(a)
bb := sink.ParseLines(b)
aa := sink.SplitLines(a)
bb := sink.SplitLines(b)
diffs, err := DiffSlices(context.Background(), aa, bb)
if err != nil {
return
Expand Down Expand Up @@ -69,8 +69,8 @@ func TestDiffSlicesAsStringDiff(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
aa := sink.ParseLines(a)
bb := sink.ParseLines(b)
aa := sink.SplitLines(a)
bb := sink.SplitLines(b)
dd, err := DiffSlices(context.Background(), aa, bb)
if err != nil {
return
Expand Down
3 changes: 2 additions & 1 deletion modules/diferenco/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (h *histogram[E]) run(ctx context.Context, beforce []E, beforePos int, afte
h.populate(beforce)
lcs := findLcs(beforce, after, h)
if lcs == nil {
changes, err := onpDiff(ctx, beforce, beforePos, after, afterPos)
changes, err := myersCompute(ctx, beforce, beforePos, after, afterPos)
if err != nil {
return err
}
Expand All @@ -186,6 +186,7 @@ func (h *histogram[E]) run(ctx context.Context, beforce []E, beforePos int, afte
}
}

// HistogramDiff: calculates the difference using the histogram algorithm
func HistogramDiff[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) {
prefix := commonPrefixLength(L1, L2)
L1 = L1[prefix:]
Expand Down
27 changes: 8 additions & 19 deletions modules/diferenco/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"path/filepath"
"runtime"
"testing"

"github.com/antgroup/hugescm/modules/diferenco/color"
)

func TestHistogram(t *testing.T) {
Expand All @@ -27,24 +29,11 @@ func TestHistogram(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
a := sink.SplitLines(textA)
b := sink.SplitLines(textB)
changes, _ := HistogramDiff(context.Background(), a, b)
i := 0
for _, c := range changes {
for ; i < c.P1; i++ {
fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]])
}
for j := c.P1; j < c.P1+c.Del; j++ {
fmt.Fprintf(os.Stderr, "- %s", sink.Lines[a[j]])
}
for j := c.P2; j < c.P2+c.Ins; j++ {
fmt.Fprintf(os.Stderr, "+ %s", sink.Lines[b[j]])
}
i += c.Del
}
for ; i < len(a); i++ {
fmt.Fprintf(os.Stderr, " %s", sink.Lines[a[i]])
}
fmt.Fprintf(os.Stderr, "\n\nEND\n\n")
u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
e := NewUnifiedEncoder(os.Stderr)
e.SetColor(color.NewColorConfig())
e.Encode([]*Unified{u})
}
6 changes: 3 additions & 3 deletions modules/diferenco/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,9 @@ func Merge(ctx context.Context, o, a, b string, labelO, labelA, labelB string) (
labelB = " " + labelB
}
sink := NewSink(NEWLINE_RAW)
slicesO := sink.ParseLines(o)
slicesA := sink.ParseLines(a)
slicesB := sink.ParseLines(b)
slicesO := sink.SplitLines(o)
slicesA := sink.SplitLines(a)
slicesB := sink.SplitLines(b)
regions := Diff3Merge(slicesA, slicesO, slicesB, true)
out := &strings.Builder{}
out.Grow(max(len(o), len(a), len(b)))
Expand Down
19 changes: 15 additions & 4 deletions modules/diferenco/myers.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,28 @@ import (
"slices"
)

func MyersDiff[E comparable](ctx context.Context, seq1, seq2 []E) ([]Change, error) {
// MyersDiff: An O(ND) diff algorithm that has a quadratic space worst-case complexity.
func MyersDiff[E comparable](ctx context.Context, L1 []E, L2 []E) ([]Change, error) {
prefix := commonPrefixLength(L1, L2)
L1 = L1[prefix:]
L2 = L2[prefix:]
suffix := commonSuffixLength(L1, L2)
L1 = L1[:len(L1)-suffix]
L2 = L2[:len(L2)-suffix]
return myersCompute(ctx, L1, prefix, L2, prefix)
}

func myersCompute[E comparable](ctx context.Context, seq1 []E, P1 int, seq2 []E, P2 int) ([]Change, error) {
// These are common special cases.
// The early return improves performance dramatically.
if len(seq1) == 0 && len(seq2) == 0 {
return []Change{}, nil
}
if len(seq1) == 0 {
return []Change{{Ins: len(seq2)}}, nil
return []Change{{P1: P1, P2: P2, Ins: len(seq2)}}, nil
}
if len(seq2) == 0 {
return []Change{{Del: len(seq1)}}, nil
return []Change{{P1: P1, P2: P2, Del: len(seq1)}}, nil
}
seqX := seq1
seqY := seq2
Expand Down Expand Up @@ -108,7 +119,7 @@ outer:
endY = path.y + path.length
}
if endX != lastAligningPosS1 || endY != lastAligningPosS2 {
changes = append(changes, Change{P1: endX, P2: endY, Del: lastAligningPosS1 - endX, Ins: lastAligningPosS2 - endY})
changes = append(changes, Change{P1: P1 + endX, P2: P2 + endY, Del: lastAligningPosS1 - endX, Ins: lastAligningPosS2 - endY})
}
if path == nil {
break
Expand Down
12 changes: 6 additions & 6 deletions modules/diferenco/myers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ func TestMyersDiff(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
a := sink.SplitLines(textA)
b := sink.SplitLines(textB)
changes, _ := MyersDiff(context.Background(), a, b)
i := 0
for _, c := range changes {
Expand Down Expand Up @@ -67,8 +67,8 @@ func TestMyersDiff2(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
a := sink.SplitLines(textA)
b := sink.SplitLines(textB)
changes, _ := MyersDiff(context.Background(), a, b)
u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String())
Expand All @@ -88,8 +88,8 @@ func TestMyersDiff3(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
a := sink.SplitLines(textA)
b := sink.SplitLines(textB)
changes, _ := MyersDiff(context.Background(), a, b)
u := sink.ToUnified(&File{Path: "a.txt"}, &File{Path: "b.txt"}, changes, a, b, DefaultContextLines)
fmt.Fprintf(os.Stderr, "diff:\n%s\n", u.String())
Expand Down
9 changes: 4 additions & 5 deletions modules/diferenco/onp.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ package diferenco

import "context"

func onpDiff[E comparable](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) {
func onpCompute[E comparable](ctx context.Context, L1 []E, P1 int, L2 []E, P2 int) ([]Change, error) {
m, n := len(L1), len(L2)
c := &onpCtx[E]{L1: L1, L2: L2, P1: P1, P2: P2}
if n >= m {
Expand Down Expand Up @@ -145,15 +145,14 @@ type onpLcs struct {
next *onpLcs
}

// OnpDiff returns the differences between data.
// It makes O(NP) (the worst case) calls to data.Equal.
// OnpDiff returns the differences between []E.
// It makes O(NP) (the worst case) calls to equal.
func OnpDiff[E comparable](ctx context.Context, L1, L2 []E) ([]Change, error) {
//return myersDiff(L1, 0, L2, 0)
prefix := commonPrefixLength(L1, L2)
L1 = L1[prefix:]
L2 = L2[prefix:]
suffix := commonSuffixLength(L1, L2)
L1 = L1[:len(L1)-suffix]
L2 = L2[:len(L2)-suffix]
return onpDiff(ctx, L1, prefix, L2, prefix)
return onpCompute(ctx, L1, prefix, L2, prefix)
}
4 changes: 2 additions & 2 deletions modules/diferenco/onp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ func TestONP(t *testing.T) {
sink := &Sink{
Index: make(map[string]int),
}
a := sink.ParseLines(textA)
b := sink.ParseLines(textB)
a := sink.SplitLines(textA)
b := sink.SplitLines(textB)
changes, _ := OnpDiff(context.Background(), a, b)
i := 0
for _, c := range changes {
Expand Down
Loading

0 comments on commit 8df8b75

Please sign in to comment.