Support the COPY_4 tag.
It is a valid encoding, even if no longer issued by most encoders.
name old speed new speed delta
WordsDecode1e1-8 525MB/s ± 0% 504MB/s ± 1% -4.04% (p=0.000 n=9+10)
WordsDecode1e2-8 1.23GB/s ± 0% 1.23GB/s ± 1% ~ (p=0.678 n=10+9)
WordsDecode1e3-8 1.54GB/s ± 0% 1.53GB/s ± 1% -0.75% (p=0.000 n=10+9)
WordsDecode1e4-8 1.53GB/s ± 0% 1.51GB/s ± 3% -1.46% (p=0.000 n=9+10)
WordsDecode1e5-8 793MB/s ± 0% 777MB/s ± 2% -2.01% (p=0.017 n=9+10)
WordsDecode1e6-8 917MB/s ± 1% 917MB/s ± 1% ~ (p=0.473 n=8+10)
WordsEncode1e1-8 641MB/s ± 2% 641MB/s ± 0% ~ (p=0.780 n=10+9)
WordsEncode1e2-8 583MB/s ± 0% 580MB/s ± 0% -0.41% (p=0.001 n=10+9)
WordsEncode1e3-8 647MB/s ± 1% 648MB/s ± 0% ~ (p=0.326 n=10+9)
WordsEncode1e4-8 442MB/s ± 1% 452MB/s ± 0% +2.20% (p=0.000 n=10+8)
WordsEncode1e5-8 355MB/s ± 1% 355MB/s ± 0% ~ (p=0.880 n=10+8)
WordsEncode1e6-8 433MB/s ± 0% 434MB/s ± 0% ~ (p=0.700 n=8+8)
RandomEncode-8 14.2GB/s ± 3% 14.2GB/s ± 3% ~ (p=0.780 n=10+9)
_UFlat0-8 2.18GB/s ± 1% 2.19GB/s ± 0% ~ (p=0.447 n=10+9)
_UFlat1-8 1.40GB/s ± 2% 1.41GB/s ± 0% +0.73% (p=0.043 n=9+10)
_UFlat2-8 23.4GB/s ± 3% 23.5GB/s ± 2% ~ (p=0.497 n=9+10)
_UFlat3-8 1.90GB/s ± 0% 1.91GB/s ± 0% +0.30% (p=0.002 n=8+9)
_UFlat4-8 13.9GB/s ± 2% 14.0GB/s ± 1% ~ (p=0.720 n=9+10)
_UFlat5-8 1.96GB/s ± 1% 1.97GB/s ± 0% +0.81% (p=0.000 n=10+9)
_UFlat6-8 813MB/s ± 0% 814MB/s ± 0% +0.17% (p=0.037 n=8+10)
_UFlat7-8 783MB/s ± 2% 785MB/s ± 0% ~ (p=0.340 n=9+9)
_UFlat8-8 859MB/s ± 0% 857MB/s ± 0% ~ (p=0.074 n=8+9)
_UFlat9-8 719MB/s ± 1% 719MB/s ± 1% ~ (p=0.621 n=10+9)
_UFlat10-8 2.84GB/s ± 0% 2.84GB/s ± 0% +0.19% (p=0.043 n=10+9)
_UFlat11-8 1.05GB/s ± 1% 1.05GB/s ± 0% ~ (p=0.523 n=9+8)
_ZFlat0-8 1.04GB/s ± 2% 1.04GB/s ± 0% ~ (p=0.222 n=9+9)
_ZFlat1-8 535MB/s ± 0% 534MB/s ± 0% ~ (p=0.059 n=9+9)
_ZFlat2-8 15.6GB/s ± 3% 15.7GB/s ± 1% ~ (p=0.720 n=9+10)
_ZFlat3-8 723MB/s ± 0% 740MB/s ± 3% +2.36% (p=0.034 n=8+10)
_ZFlat4-8 9.16GB/s ± 1% 9.20GB/s ± 1% ~ (p=0.297 n=9+9)
_ZFlat5-8 987MB/s ± 1% 991MB/s ± 0% ~ (p=0.167 n=9+8)
_ZFlat6-8 378MB/s ± 2% 379MB/s ± 0% ~ (p=0.334 n=9+8)
_ZFlat7-8 350MB/s ± 2% 352MB/s ± 0% +0.60% (p=0.014 n=9+8)
_ZFlat8-8 397MB/s ± 0% 396MB/s ± 1% ~ (p=0.965 n=8+10)
_ZFlat9-8 328MB/s ± 0% 327MB/s ± 1% ~ (p=0.409 n=8+9)
_ZFlat10-8 1.33GB/s ± 0% 1.33GB/s ± 1% ~ (p=0.356 n=9+10)
_ZFlat11-8 605MB/s ± 0% 605MB/s ± 1% ~ (p=0.743 n=9+8)
diff --git a/README b/README
index 6b13826..cea1287 100644
--- a/README
+++ b/README
@@ -13,65 +13,65 @@
The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
or so files, the same set used by the C++ Snappy code (github.com/google/snappy
and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
-3.40GHz", Go's GOARCH=amd64 numbers as of 2016-04-29:
+3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
"go test -test.bench=."
-_UFlat0-8 2.23GB/s ± 1% html
-_UFlat1-8 1.43GB/s ± 0% urls
-_UFlat2-8 23.7GB/s ± 1% jpg
-_UFlat3-8 1.93GB/s ± 0% jpg_200
-_UFlat4-8 13.9GB/s ± 2% pdf
-_UFlat5-8 2.00GB/s ± 0% html4
-_UFlat6-8 829MB/s ± 0% txt1
-_UFlat7-8 799MB/s ± 0% txt2
-_UFlat8-8 871MB/s ± 0% txt3
-_UFlat9-8 730MB/s ± 0% txt4
-_UFlat10-8 2.87GB/s ± 0% pb
-_UFlat11-8 1.07GB/s ± 0% gaviota
+_UFlat0-8 2.19GB/s ± 0% html
+_UFlat1-8 1.41GB/s ± 0% urls
+_UFlat2-8 23.5GB/s ± 2% jpg
+_UFlat3-8 1.91GB/s ± 0% jpg_200
+_UFlat4-8 14.0GB/s ± 1% pdf
+_UFlat5-8 1.97GB/s ± 0% html4
+_UFlat6-8 814MB/s ± 0% txt1
+_UFlat7-8 785MB/s ± 0% txt2
+_UFlat8-8 857MB/s ± 0% txt3
+_UFlat9-8 719MB/s ± 1% txt4
+_UFlat10-8 2.84GB/s ± 0% pb
+_UFlat11-8 1.05GB/s ± 0% gaviota
_ZFlat0-8 1.04GB/s ± 0% html
-_ZFlat1-8 536MB/s ± 0% urls
-_ZFlat2-8 16.3GB/s ± 2% jpg
-_ZFlat3-8 762MB/s ± 0% jpg_200
-_ZFlat4-8 9.48GB/s ± 1% pdf
-_ZFlat5-8 990MB/s ± 0% html4
-_ZFlat6-8 381MB/s ± 0% txt1
-_ZFlat7-8 353MB/s ± 0% txt2
-_ZFlat8-8 398MB/s ± 0% txt3
-_ZFlat9-8 329MB/s ± 0% txt4
-_ZFlat10-8 1.35GB/s ± 1% pb
-_ZFlat11-8 608MB/s ± 0% gaviota
+_ZFlat1-8 534MB/s ± 0% urls
+_ZFlat2-8 15.7GB/s ± 1% jpg
+_ZFlat3-8 740MB/s ± 3% jpg_200
+_ZFlat4-8 9.20GB/s ± 1% pdf
+_ZFlat5-8 991MB/s ± 0% html4
+_ZFlat6-8 379MB/s ± 0% txt1
+_ZFlat7-8 352MB/s ± 0% txt2
+_ZFlat8-8 396MB/s ± 1% txt3
+_ZFlat9-8 327MB/s ± 1% txt4
+_ZFlat10-8 1.33GB/s ± 1% pb
+_ZFlat11-8 605MB/s ± 1% gaviota
"go test -test.bench=. -tags=noasm"
-_UFlat0-8 637MB/s ± 0% html
-_UFlat1-8 506MB/s ± 0% urls
-_UFlat2-8 23.0GB/s ± 5% jpg
-_UFlat3-8 1.17GB/s ± 0% jpg_200
-_UFlat4-8 4.44GB/s ± 1% pdf
-_UFlat5-8 623MB/s ± 0% html4
-_UFlat6-8 300MB/s ± 1% txt1
-_UFlat7-8 293MB/s ± 0% txt2
-_UFlat8-8 316MB/s ± 0% txt3
-_UFlat9-8 285MB/s ± 0% txt4
-_UFlat10-8 768MB/s ± 0% pb
-_UFlat11-8 406MB/s ± 1% gaviota
+_UFlat0-8 621MB/s ± 2% html
+_UFlat1-8 494MB/s ± 1% urls
+_UFlat2-8 23.2GB/s ± 1% jpg
+_UFlat3-8 1.12GB/s ± 1% jpg_200
+_UFlat4-8 4.35GB/s ± 1% pdf
+_UFlat5-8 609MB/s ± 0% html4
+_UFlat6-8 296MB/s ± 0% txt1
+_UFlat7-8 288MB/s ± 0% txt2
+_UFlat8-8 309MB/s ± 1% txt3
+_UFlat9-8 280MB/s ± 1% txt4
+_UFlat10-8 753MB/s ± 0% pb
+_UFlat11-8 400MB/s ± 0% gaviota
-_ZFlat0-8 411MB/s ± 1% html
+_ZFlat0-8 409MB/s ± 1% html
_ZFlat1-8 250MB/s ± 1% urls
-_ZFlat2-8 12.7GB/s ± 1% jpg
-_ZFlat3-8 157MB/s ± 0% jpg_200
-_ZFlat4-8 2.95GB/s ± 0% pdf
-_ZFlat5-8 406MB/s ± 0% html4
-_ZFlat6-8 182MB/s ± 0% txt1
-_ZFlat7-8 173MB/s ± 1% txt2
-_ZFlat8-8 191MB/s ± 0% txt3
-_ZFlat9-8 166MB/s ± 0% txt4
-_ZFlat10-8 480MB/s ± 0% pb
-_ZFlat11-8 272MB/s ± 0% gaviota
+_ZFlat2-8 12.3GB/s ± 1% jpg
+_ZFlat3-8 132MB/s ± 0% jpg_200
+_ZFlat4-8 2.92GB/s ± 0% pdf
+_ZFlat5-8 405MB/s ± 1% html4
+_ZFlat6-8 179MB/s ± 1% txt1
+_ZFlat7-8 170MB/s ± 1% txt2
+_ZFlat8-8 189MB/s ± 1% txt3
+_ZFlat9-8 164MB/s ± 1% txt4
+_ZFlat10-8 479MB/s ± 1% pb
+_ZFlat11-8 270MB/s ± 1% gaviota
diff --git a/decode.go b/decode.go
index 819c717..72efb03 100644
--- a/decode.go
+++ b/decode.go
@@ -18,7 +18,6 @@
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
- errUnsupportedCopy4Tag = errors.New("snappy: unsupported COPY_4 tag")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
@@ -46,7 +45,6 @@
const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
- decodeErrCodeUnsupportedCopy4Tag = 3
)
// Decode returns the decoded form of src. The returned slice may be a sub-
@@ -69,8 +67,6 @@
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
- case decodeErrCodeUnsupportedCopy4Tag:
- return nil, errUnsupportedCopy4Tag
}
return nil, ErrCorrupt
}
diff --git a/decode_amd64.s b/decode_amd64.s
index ed1e93b..e6179f6 100644
--- a/decode_amd64.s
+++ b/decode_amd64.s
@@ -226,6 +226,25 @@
// ----------------------------------------
// The code below handles copy tags.
+tagCopy4:
+ // case tagCopy4:
+ // s += 5
+ ADDQ $5, SI
+
+ // if uint(s) > uint(len(src)) { etc }
+ MOVQ SI, BX
+ SUBQ R11, BX
+ CMPQ BX, R12
+ JA errCorrupt
+
+ // length = 1 + int(src[s-5])>>2
+ SHRQ $2, CX
+ INCQ CX
+
+ // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+ MOVLQZX -4(SI), DX
+ JMP doCopy
+
tagCopy2:
// case tagCopy2:
// s += 3
@@ -241,7 +260,7 @@
SHRQ $2, CX
INCQ CX
- // offset = int(src[s-2]) | int(src[s-1])<<8
+ // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(SI), DX
JMP doCopy
@@ -251,7 +270,7 @@
// - CX == src[s]
CMPQ BX, $2
JEQ tagCopy2
- JA errUC4T
+ JA tagCopy4
// case tagCopy1:
// s += 2
@@ -263,7 +282,7 @@
CMPQ BX, R12
JA errCorrupt
- // offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+ // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVQ CX, DX
ANDQ $0xe0, DX
SHLQ $3, DX
@@ -469,8 +488,3 @@
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET
-
-errUC4T:
- // return decodeErrCodeUnsupportedCopy4Tag
- MOVQ $3, ret+48(FP)
- RET
diff --git a/decode_other.go b/decode_other.go
index f305b6f..8c9f204 100644
--- a/decode_other.go
+++ b/decode_other.go
@@ -63,7 +63,7 @@
return decodeErrCodeCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
- offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
+ offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
case tagCopy2:
s += 3
@@ -71,10 +71,15 @@
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
- offset = int(src[s-2]) | int(src[s-1])<<8
+ offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
- return decodeErrCodeUnsupportedCopy4Tag
+ s += 5
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ return decodeErrCodeCorrupt
+ }
+ length = 1 + int(src[s-5])>>2
+ offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {
diff --git a/snappy.go b/snappy.go
index 0102542..0cf5e37 100644
--- a/snappy.go
+++ b/snappy.go
@@ -32,7 +32,10 @@
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- - For l == 3, this tag is a legacy format that is no longer supported.
+ - For l == 3, this tag is a legacy format that is no longer issued by most
+ encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
+ [1, 65). The length is 1 + m. The offset is the little-endian unsigned
+ integer denoted by the next 4 bytes.
*/
const (
tagLiteral = 0x00
diff --git a/snappy_test.go b/snappy_test.go
index ce3f08e..2712710 100644
--- a/snappy_test.go
+++ b/snappy_test.go
@@ -257,10 +257,10 @@
"",
ErrCorrupt,
}, {
- `decodedLen=4; tagCopy4; unsupported COPY_4 tag`,
- "\x04" + "\x03\x00\x00\x00\x00",
+ `decodedLen=4; tagCopy4, 4 extra length|offset bytes; not enough extra bytes`,
+ "\x04" + "\x03\x00\x00\x00",
"",
- errUnsupportedCopy4Tag,
+ ErrCorrupt,
}, {
`decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`,
"\x04" + "\x0cabcd",
@@ -311,6 +311,11 @@
"\x06" + "\x0cabcd" + "\x06\x03\x00",
"abcdbc",
nil,
+ }, {
+ `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy4; length=2 offset=3; valid input`,
+ "\x06" + "\x0cabcd" + "\x07\x03\x00\x00\x00",
+ "abcdbc",
+ nil,
}}
const (
@@ -369,6 +374,34 @@
}
}
+func TestDecodeCopy4(t *testing.T) {
+ dots := strings.Repeat(".", 65536)
+
+ input := strings.Join([]string{
+ "\x89\x80\x04", // decodedLen = 65545.
+ "\x0cpqrs", // 4-byte literal "pqrs".
+ "\xf4\xff\xff" + dots, // 65536-byte literal dots.
+ "\x13\x04\x00\x01\x00", // tagCopy4; length=5 offset=65540.
+ }, "")
+
+ gotBytes, err := Decode(nil, []byte(input))
+ if err != nil {
+ t.Fatal(err)
+ }
+ got := string(gotBytes)
+ want := "pqrs" + dots + "pqrs."
+ if len(got) != len(want) {
+ t.Fatalf("got %d bytes, want %d", len(got), len(want))
+ }
+ if got != want {
+ for i := 0; i < len(got); i++ {
+ if g, w := got[i], want[i]; g != w {
+ t.Fatalf("byte #%d: got %#02x, want %#02x", i, g, w)
+ }
+ }
+ }
+}
+
// TestDecodeLengthOffset tests decoding an encoding of the form literal +
// copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB".
func TestDecodeLengthOffset(t *testing.T) {