curve25519: mask high bit when loading group point

Comparison against BoringSSL-generated test vectors showed mismatches
with the pure Go implementation of curve25519. The problem was narrowed
down to a missing mask in feFromBytes(). This diff adds the mask,
bringing this back in line with the reference implementation and
RFC 7748:

    When receiving such an array, implementations of X25519 (but not
    X448) MUST mask the most significant bit in the final byte.  This is
    done to preserve compatibility with point formats that reserve the
    sign bit for use in other protocols and to increase resistance to
    implementation fingerprinting.

Fixes golang/go#30095

Change-Id: If7efc0e2acd6efb761d6e3cb89cec359d7d81cb1
Reviewed-on: https://go-review.googlesource.com/c/161257
Run-TryBot: Filippo Valsorda <filippo@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
diff --git a/curve25519/curve25519.go b/curve25519/curve25519.go
index cb8fbc5..75f24ba 100644
--- a/curve25519/curve25519.go
+++ b/curve25519/curve25519.go
@@ -86,7 +86,7 @@
 	h6 := load3(src[20:]) << 7
 	h7 := load3(src[23:]) << 5
 	h8 := load3(src[26:]) << 4
-	h9 := load3(src[29:]) << 2
+	h9 := (load3(src[29:]) & 0x7fffff) << 2
 
 	var carry [10]int64
 	carry[9] = (h9 + 1<<24) >> 25
diff --git a/curve25519/curve25519_test.go b/curve25519/curve25519_test.go
index 051a830..7b6cdd4 100644
--- a/curve25519/curve25519_test.go
+++ b/curve25519/curve25519_test.go
@@ -5,6 +5,8 @@
 package curve25519
 
 import (
+	"bytes"
+	"crypto/rand"
 	"fmt"
 	"testing"
 )
@@ -28,6 +30,30 @@
 	}
 }
 
+// TestHighBitIgnored tests the following requirement in RFC 7748:
+//
+//	When receiving such an array, implementations of X25519 (but not X448) MUST
+//	mask the most significant bit in the final byte.
+//
+// Regression test for issue #30095.
+func TestHighBitIgnored(t *testing.T) {
+	var s, u [32]byte
+	rand.Read(s[:])
+	rand.Read(u[:])
+
+	var hi0, hi1 [32]byte
+
+	u[31] &= 0x7f
+	ScalarMult(&hi0, &s, &u)
+
+	u[31] |= 0x80
+	ScalarMult(&hi1, &s, &u)
+
+	if !bytes.Equal(hi0[:], hi1[:]) {
+		t.Errorf("high bit of group point should not affect result")
+	}
+}
+
 func BenchmarkScalarBaseMult(b *testing.B) {
 	var in, out [32]byte
 	in[0] = 1