Change diff.Difference to always return an edit-script (#45)
Rather than performing the heuristic for whether two slices are
"too different" in the diff package, just return the full edit-script
and move the heuristic logic into the cmp package.
The main adjustment to the heuristic is that we only print the
full slice if it is composed of a slice of primitive types (e.g., []byte)
and the difference between the two slices is sufficiently great enough.
Fixes #44
diff --git a/cmp/compare.go b/cmp/compare.go
index 2980d74..eb84d01 100644
--- a/cmp/compare.go
+++ b/cmp/compare.go
@@ -377,15 +377,23 @@
s.curPath.push(step)
// Compute an edit-script for slices vx and vy.
- eq, es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
+ es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
step.xkey, step.ykey = ix, iy
return s.statelessCompare(vx.Index(ix), vy.Index(iy))
})
- // Equal or no edit-script, so report entire slices as is.
- if eq || es == nil {
+ // Report the entire slice as is if the arrays are of primitive kind,
+ // and the arrays are different enough.
+ isPrimitive := false
+ switch t.Elem().Kind() {
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
+ reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
+ reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
+ isPrimitive = true
+ }
+ if isPrimitive && es.Dist() > (vx.Len()+vy.Len())/4 {
s.curPath.pop() // Pop first since we are reporting the whole slice
- s.report(eq, vx, vy)
+ s.report(false, vx, vy)
return
}
diff --git a/cmp/compare_test.go b/cmp/compare_test.go
index c60262c..c26ac3f 100644
--- a/cmp/compare_test.go
+++ b/cmp/compare_test.go
@@ -84,6 +84,37 @@
func comparerTests() []test {
const label = "Comparer"
+ type tarHeader struct {
+ Name string
+ Mode int64
+ Uid int
+ Gid int
+ Size int64
+ ModTime time.Time
+ Typeflag byte
+ Linkname string
+ Uname string
+ Gname string
+ Devmajor int64
+ Devminor int64
+ AccessTime time.Time
+ ChangeTime time.Time
+ Xattrs map[string]string
+ }
+
+ makeTarHeaders := func(tf byte) (hs []tarHeader) {
+ for i := 0; i < 5; i++ {
+ hs = append(hs, tarHeader{
+ Name: fmt.Sprintf("some/dummy/test/file%d", i),
+ Mode: 0664, Uid: i * 1000, Gid: i * 1000, Size: 1 << uint(i),
+ ModTime: now.Add(time.Duration(i) * time.Hour),
+ Uname: "user", Gname: "group",
+ Typeflag: tf,
+ })
+ }
+ return hs
+ }
+
return []test{{
label: label,
x: 1,
@@ -301,6 +332,26 @@
+: <non-existent>`,
}, {
label: label,
+ x: makeTarHeaders('0'),
+ y: makeTarHeaders('\x00'),
+ wantDiff: `
+{[]cmp_test.tarHeader}[0].Typeflag:
+ -: 0x30
+ +: 0x00
+{[]cmp_test.tarHeader}[1].Typeflag:
+ -: 0x30
+ +: 0x00
+{[]cmp_test.tarHeader}[2].Typeflag:
+ -: 0x30
+ +: 0x00
+{[]cmp_test.tarHeader}[3].Typeflag:
+ -: 0x30
+ +: 0x00
+{[]cmp_test.tarHeader}[4].Typeflag:
+ -: 0x30
+ +: 0x00`,
+ }, {
+ label: label,
x: make([]int, 1000),
y: make([]int, 1000),
opts: []cmp.Option{
diff --git a/cmp/internal/diff/diff.go b/cmp/internal/diff/diff.go
index baa41fd..260befe 100644
--- a/cmp/internal/diff/diff.go
+++ b/cmp/internal/diff/diff.go
@@ -106,9 +106,9 @@
// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
//
-// This function may return a edit-script, which is a sequence of operations
-// needed to convert one list into the other. If non-nil, the following
-// invariants for the edit-script are maintained:
+// This function returns an edit-script, which is a sequence of operations
+// needed to convert one list into the other. The following invariants for
+// the edit-script are maintained:
// • eq == (es.Dist()==0)
// • nx == es.LenX()
// • ny == es.LenY()
@@ -117,17 +117,7 @@
// produces an edit-script with a minimal Levenshtein distance). This algorithm
// favors performance over optimality. The exact output is not guaranteed to
// be stable and may change over time.
-func Difference(nx, ny int, f EqualFunc) (eq bool, es EditScript) {
- es = searchGraph(nx, ny, f)
- st := es.stats()
- eq = len(es) == st.NI
- if !eq && st.NI < (nx+ny)/4 {
- return eq, nil // Edit-script more distracting than helpful
- }
- return eq, es
-}
-
-func searchGraph(nx, ny int, f EqualFunc) EditScript {
+func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// This algorithm is based on traversing what is known as an "edit-graph".
// See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"
// by Eugene W. Myers. Since D can be as large as N itself, this is
diff --git a/cmp/internal/diff/diff_test.go b/cmp/internal/diff/diff_test.go
index 5996ea2..6399509 100644
--- a/cmp/internal/diff/diff_test.go
+++ b/cmp/internal/diff/diff_test.go
@@ -228,14 +228,17 @@
y: " 1234567890",
want: "X.........Y",
}, {
- x: "0123456789",
- y: "9876543210",
+ x: "0 1 2 3 45 6 7 8 9 ",
+ y: " 9 8 7 6 54 3 2 1 0",
+ want: "XYXYXYXYX.YXYXYXYXY",
}, {
- x: "0123456789",
- y: "6725819034",
+ x: "0 1 2345678 9 ",
+ y: " 6 72 5 819034",
+ want: "XYXY.XX.XX.Y.YYY",
}, {
- x: "FBQMOIGTLN72X90E4SP651HKRJUDA83CVZW",
- y: "5WHXO10R9IVKZLCTAJ8P3NSEQM472G6UBDF",
+ x: "F B Q M O I G T L N72X90 E 4S P 651HKRJU DA 83CVZW",
+ y: " 5 W H XO10R9IV K ZLCTAJ8P3N SEQM4 7 2G6 UBD F ",
+ want: "XYXYXYXY.YYYY.YXYXY.YYYYYYY.XXXXXY.YY.XYXYY.XXXXXX.Y.XYXXXXXX",
}}
for _, tt := range tests {
@@ -333,26 +336,17 @@
}
func testStrings(t *testing.T, x, y string) EditScript {
- wantEq := x == y
- eq, es := Difference(len(x), len(y), func(ix, iy int) Result {
+ es := Difference(len(x), len(y), func(ix, iy int) Result {
return compareByte(x[ix], y[iy])
})
- if eq != wantEq {
- t.Errorf("equality mismatch: got %v, want %v", eq, wantEq)
+ if es.LenX() != len(x) {
+ t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x))
}
- if es != nil {
- if es.LenX() != len(x) {
- t.Errorf("es.LenX = %d, want %d", es.LenX(), len(x))
- }
- if es.LenY() != len(y) {
- t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y))
- }
- if got := (es.Dist() == 0); got != wantEq {
- t.Errorf("violation of equality invariant: got %v, want %v", got, wantEq)
- }
- if !validateScript(x, y, es) {
- t.Errorf("invalid edit script: %v", es)
- }
+ if es.LenY() != len(y) {
+ t.Errorf("es.LenY = %d, want %d", es.LenY(), len(y))
+ }
+ if !validateScript(x, y, es) {
+ t.Errorf("invalid edit script: %v", es)
}
return es
}