Make a small s/uint/uint32/ decoder optimization.

I'm not entirely sure why the benchmark numbers improve as much as they
do, but I'll take it.

benchmark                     old MB/s     new MB/s     speedup
BenchmarkWordsDecode1e1-8     482.12       485.84       1.01x
BenchmarkWordsDecode1e2-8     372.28       421.86       1.13x
BenchmarkWordsDecode1e3-8     482.21       525.44       1.09x
BenchmarkWordsDecode1e4-8     339.46       360.87       1.06x
BenchmarkWordsDecode1e5-8     264.90       270.42       1.02x
BenchmarkWordsDecode1e6-8     284.27       290.98       1.02x
Benchmark_UFlat0-8            511.15       544.02       1.06x
Benchmark_UFlat1-8            431.52       450.03       1.04x
Benchmark_UFlat2-8            15208.70     15099.07     0.99x
Benchmark_UFlat3-8            805.02       871.78       1.08x
Benchmark_UFlat4-8            2631.19      2980.30      1.13x
Benchmark_UFlat5-8            501.62       535.45       1.07x
Benchmark_UFlat6-8            271.30       278.13       1.03x
Benchmark_UFlat7-8            265.19       272.14       1.03x
Benchmark_UFlat8-8            282.54       288.80       1.02x
Benchmark_UFlat9-8            256.39       262.69       1.02x
Benchmark_UFlat10-8           590.37       640.96       1.09x
Benchmark_UFlat11-8           339.13       357.01       1.05x
diff --git a/decode.go b/decode.go
index e99b20f..666e170 100644
--- a/decode.go
+++ b/decode.go
@@ -63,7 +63,7 @@
 	for s < len(src) {
 		switch src[s] & 0x03 {
 		case tagLiteral:
-			x := uint(src[s] >> 2)
+			x := uint32(src[s] >> 2)
 			switch {
 			case x < 60:
 				s++
@@ -72,27 +72,27 @@
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return nil, ErrCorrupt
 				}
-				x = uint(src[s-1])
+				x = uint32(src[s-1])
 			case x == 61:
 				s += 3
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return nil, ErrCorrupt
 				}
-				x = uint(src[s-2]) | uint(src[s-1])<<8
+				x = uint32(src[s-2]) | uint32(src[s-1])<<8
 			case x == 62:
 				s += 4
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return nil, ErrCorrupt
 				}
-				x = uint(src[s-3]) | uint(src[s-2])<<8 | uint(src[s-1])<<16
+				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
 			case x == 63:
 				s += 5
 				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 					return nil, ErrCorrupt
 				}
-				x = uint(src[s-4]) | uint(src[s-3])<<8 | uint(src[s-2])<<16 | uint(src[s-1])<<24
+				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
 			}
-			length = int(x + 1)
+			length = int(x) + 1
 			if length <= 0 {
 				return nil, errUnsupportedLiteralLength
 			}