Adjust heuristic for line-based versus byte-based diffing (#299)
If the string has many characters that require escape sequences to print,
then we need to take that into consideration and avoid byte-by-byte diffing.
Co-authored-by: Damien Neil <neild@users.noreply.github.com>
diff --git a/cmp/compare_test.go b/cmp/compare_test.go
index dc86f01..88b7d45 100644
--- a/cmp/compare_test.go
+++ b/cmp/compare_test.go
@@ -1403,6 +1403,23 @@
[]byte("\xffoo"), []byte("foo"), []byte("barbaz"), []byte("added"), []byte("here"), []byte("hrmph\xff"),
},
reason: "should print text byte slices as strings except those with binary",
+ }, {
+ label: label + "/ManyEscapeCharacters",
+ x: `[
+ {"Base32": "NA======"},
+ {"Base32": "NBSQ===="},
+ {"Base32": "NBSWY==="},
+ {"Base32": "NBSWY3A="},
+ {"Base32": "NBSWY3DP"}
+]`,
+ y: `[
+ {"Base32": "NB======"},
+ {"Base32": "NBSQ===="},
+ {"Base32": "NBSWY==="},
+ {"Base32": "NBSWY3A="},
+ {"Base32": "NBSWY3DP"}
+]`,
+ reason: "should use line-based diffing since byte-based diffing is unreadable due to heavy amounts of escaping",
}}
}
diff --git a/cmp/report_slices.go b/cmp/report_slices.go
index b38ed68..23e444f 100644
--- a/cmp/report_slices.go
+++ b/cmp/report_slices.go
@@ -147,7 +147,10 @@
})
efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
- isPureLinedText = efficiencyLines < 4*efficiencyBytes
+ quotedLength := len(strconv.Quote(sx + sy))
+ unquotedLength := len(sx) + len(sy)
+ escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength)
+ isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1
}
}
diff --git a/cmp/testdata/diffs b/cmp/testdata/diffs
index 8bff76f..be77b95 100644
--- a/cmp/testdata/diffs
+++ b/cmp/testdata/diffs
@@ -1182,6 +1182,18 @@
+ {0x68, 0x72, 0x6d, 0x70, 0x68, 0xff},
}
>>> TestDiff/Reporter/SliceOfBytesBinary
+<<< TestDiff/Reporter/ManyEscapeCharacters
+ (
+ """
+ [
+- {"Base32": "NA======"},
++ {"Base32": "NB======"},
+ {"Base32": "NBSQ===="},
+ {"Base32": "NBSWY==="},
+ ... // 3 identical lines
+ """
+ )
+>>> TestDiff/Reporter/ManyEscapeCharacters
<<< TestDiff/EmbeddedStruct/ParentStructA/Inequal
teststructs.ParentStructA{
privateStruct: teststructs.privateStruct{