openpgp/clearsign: reject potentially misleading headers and messages

Aida Mynzhasova of SEC Consult Vulnerability Lab reported that the
clearsign package accepts some malformed messages, which can make it
possible for an attacker to trick a human user (but not a Go program)
into thinking unverified text is part of the message.

For example, if in the following message the vertical tab character is
printed as a new line, a human observer could believe that the
unverified header text is part of the signed message.

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1\x0b\x0bThis text is part of the header.

Hello world!
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (GNU/Linux)

iJwEAQECAAYFAk8kMuEACgkQO9o98PRieSpMsAQAhmY/vwmNpflrPgmfWsYhk5O8
[...]
MyTpno24AjIAGb+mH1U=
=hIJ6
-----END PGP SIGNATURE-----

The OpenPGP specs are delightfully vague about purpose and validation of
these headers. RFC 4880, Section 7 says

    The cleartext signed message consists of:

        - The cleartext header '-----BEGIN PGP SIGNED MESSAGE-----' on a
        single line,

        - One or more "Hash" Armor Headers,

        - Exactly one empty line not included into the message digest,
        [...]

but also

    If MD5 is the only hash used, then an
    implementation MAY omit this header for improved V2.x compatibility.

and

    If more than one message digest is used in the signature, the "Hash"
    armor header contains a comma-delimited list of used message digests.

which seems to suggest that there can be zero or more Hash headers, each
with one or more algorithms, and no other header types.

Anyway, it's entirely unclear what security purpose, if any, the Hash
header accomplishes. If the hash is too weak to be secure or
unsupported, the verification will fail. Otherwise, the user shouldn't
care. Given its dubious function, avoid breaking abstractions to check
that it matches the signature, and just document it as unverified.

As for valid characters, RFC 4880 is silent, except reluctantly
mentioning that the Comment header can be UTF-8, but I am going to
assume that all hash algorithms will have ASCII names, because come on.

Even more importantly, reject non-Hash SIGNED MESSAGE headers (as opposed
to the SIGNATURE headers), to prevent a "Thank you!" message turning into

-----BEGIN PGP SIGNED MESSAGE-----
Reminder: I need you to wire $100k to 12345566 as soon as possible.

Thank you!
-----BEGIN PGP SIGNATURE-----
[...]

While at it, also check for trailing characters after the signed message
delimiter, as they are invalid and can be similarly used to confuse humans.

The Decode API is also unfortunate in that it doesn't return an error,
so we can't tell the user what's wrong with the message, but that's what
we've got.

Change-Id: I8a72c4851075337443d7a27e0b49a6b6e39f5a41
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/453011
Reviewed-by: Adam Langley <agl@google.com>
Reviewed-on: https://go-review.googlesource.com/c/crypto/+/173778
Run-TryBot: Filippo Valsorda <filippo@golang.org>
Reviewed-by: Adam Langley <agl@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/openpgp/clearsign/clearsign.go b/openpgp/clearsign/clearsign.go
index a9437dc..c360460 100644
--- a/openpgp/clearsign/clearsign.go
+++ b/openpgp/clearsign/clearsign.go
@@ -18,6 +18,7 @@
 	"io"
 	"net/textproto"
 	"strconv"
+	"strings"
 
 	"golang.org/x/crypto/openpgp/armor"
 	"golang.org/x/crypto/openpgp/errors"
@@ -27,7 +28,7 @@
 // A Block represents a clearsigned message. A signature on a Block can
 // be checked by passing Bytes into openpgp.CheckDetachedSignature.
 type Block struct {
-	Headers          textproto.MIMEHeader // Optional message headers
+	Headers          textproto.MIMEHeader // Optional unverified Hash headers
 	Plaintext        []byte               // The original message text
 	Bytes            []byte               // The signed message
 	ArmoredSignature *armor.Block         // The signature block
@@ -69,8 +70,13 @@
 	return data[0:i], data[j:]
 }
 
-// Decode finds the first clearsigned message in data and returns it, as well
-// as the suffix of data which remains after the message.
+// Decode finds the first clearsigned message in data and returns it, as well as
+// the suffix of data which remains after the message. Any prefix data is
+// discarded.
+//
+// If no message is found, or if the message is invalid, Decode returns nil and
+// the whole data slice. The only allowed header type is Hash, and it is not
+// verified against the signature hash.
 func Decode(data []byte) (b *Block, rest []byte) {
 	// start begins with a newline. However, at the very beginning of
 	// the byte array, we'll accept the start string without it.
@@ -83,8 +89,11 @@
 		return nil, data
 	}
 
-	// Consume the start line.
-	_, rest = getLine(rest)
+	// Consume the start line and check it does not have a suffix.
+	suffix, rest := getLine(rest)
+	if len(suffix) != 0 {
+		return nil, data
+	}
 
 	var line []byte
 	b = &Block{
@@ -103,15 +112,25 @@
 			break
 		}
 
+		// Reject headers with control or Unicode characters.
+		if i := bytes.IndexFunc(line, func(r rune) bool {
+			return r < 0x20 || r > 0x7e
+		}); i != -1 {
+			return nil, data
+		}
+
 		i := bytes.Index(line, []byte{':'})
 		if i == -1 {
 			return nil, data
 		}
 
-		key, val := line[0:i], line[i+1:]
-		key = bytes.TrimSpace(key)
-		val = bytes.TrimSpace(val)
-		b.Headers.Add(string(key), string(val))
+		key, val := string(line[0:i]), string(line[i+1:])
+		key = strings.TrimSpace(key)
+		if key != "Hash" {
+			return nil, data
+		}
+		val = strings.TrimSpace(val)
+		b.Headers.Add(key, val)
 	}
 
 	firstLine := true
diff --git a/openpgp/clearsign/clearsign_test.go b/openpgp/clearsign/clearsign_test.go
index 96f5d78..737f41f 100644
--- a/openpgp/clearsign/clearsign_test.go
+++ b/openpgp/clearsign/clearsign_test.go
@@ -47,12 +47,6 @@
 	testParse(t, clearsignInput2, "\r\n\r\n(This message has a couple of blank lines at the start and end.)\r\n\r\n", "\n\n(This message has a couple of blank lines at the start and end.)\n\n\n")
 }
 
-func TestParseInvalid(t *testing.T) {
-	if b, _ := Decode(clearsignInput3); b != nil {
-		t.Fatal("decoded a bad clearsigned message without any error")
-	}
-}
-
 func TestParseWithNoNewlineAtEnd(t *testing.T) {
 	input := clearsignInput
 	input = input[:len(input)-len("trailing")-1]
@@ -140,6 +134,10 @@
 }
 
 func TestMultiSign(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping long test in -short mode")
+	}
+
 	zero := quickRand(0)
 	config := packet.Config{Rand: &zero}
 
@@ -193,6 +191,59 @@
 	}
 }
 
+const signatureBlock = `
+-----BEGIN PGP SIGNATURE-----
+Version: OpenPrivacy 0.99
+
+yDgBO22WxBHv7O8X7O/jygAEzol56iUKiXmV+XmpCtmpqQUKiQrFqclFqUDBovzS
+vBSFjNSiVHsuAA==
+=njUN
+-----END PGP SIGNATURE-----
+`
+
+var invalidInputs = []string{
+	`
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA256
+
+(This message was truncated.)
+`,
+	`
+-----BEGIN PGP SIGNED MESSAGE-----garbage
+Hash: SHA256
+
+_o/
+` + signatureBlock,
+	`
+garbage-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA256
+
+_o/
+` + signatureBlock,
+	`
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA` + "\x0b\x0b" + `256
+
+_o/
+` + signatureBlock,
+	`
+-----BEGIN PGP SIGNED MESSAGE-----
+NotHash: SHA256
+
+_o/
+` + signatureBlock,
+}
+
+func TestParseInvalid(t *testing.T) {
+	for i, input := range invalidInputs {
+		if b, rest := Decode([]byte(input)); b != nil {
+			t.Errorf("#%d: decoded a bad clearsigned message without any error", i)
+		} else if string(rest) != input {
+			t.Errorf("#%d: did not return all data with a bad message", i)
+		}
+	}
+}
+
 var clearsignInput = []byte(`
 ;lasjlkfdsa
 
@@ -235,13 +286,6 @@
 
 trailing`)
 
-var clearsignInput3 = []byte(`
------BEGIN PGP SIGNED MESSAGE-----
-Hash: SHA256
-
-(This message was truncated.)
-`)
-
 var signingKey = `-----BEGIN PGP PRIVATE KEY BLOCK-----
 Version: GnuPG v1.4.10 (GNU/Linux)