blob: cbc7d46a202c6bbd83e4042c3ea1a4024057fc0c [file] [log] [blame]
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////
package com.google.crypto.tink.subtle;
import static com.google.crypto.tink.subtle.Ed25519Constants.B2;
import static com.google.crypto.tink.subtle.Ed25519Constants.B_TABLE;
import static com.google.crypto.tink.subtle.Ed25519Constants.D;
import static com.google.crypto.tink.subtle.Ed25519Constants.D2;
import static com.google.crypto.tink.subtle.Ed25519Constants.SQRTM1;
import static com.google.crypto.tink.subtle.Field25519.FIELD_LEN;
import static com.google.crypto.tink.subtle.Field25519.LIMB_CNT;
import java.security.GeneralSecurityException;
import java.security.MessageDigest;
import java.util.Arrays;
/**
* This implementation is based on the ed25519/ref10 implementation in NaCl.
*
* <p>It implements this twisted Edwards curve:
*
* <pre>
* -x^2 + y^2 = 1 + (-121665 / 121666 mod 2^255-19)*x^2*y^2
* </pre>
*
* @see <a href="https://eprint.iacr.org/2008/013.pdf">Bernstein D.J., Birkner P., Joye M., Lange
* T., Peters C. (2008) Twisted Edwards Curves</a>
* @see <a href="https://eprint.iacr.org/2008/522.pdf">Hisil H., Wong K.KH., Carter G., Dawson E.
* (2008) Twisted Edwards Curves Revisited</a>
*/
final class Ed25519 {
public static final int SECRET_KEY_LEN = FIELD_LEN;
public static final int PUBLIC_KEY_LEN = FIELD_LEN;
public static final int SIGNATURE_LEN = FIELD_LEN * 2;
// (x = 0, y = 1) point
private static final CachedXYT CACHED_NEUTRAL = new CachedXYT(
new long[]{1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
new long[]{1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
new long[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0});
private static final PartialXYZT NEUTRAL = new PartialXYZT(
new XYZ(new long[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
new long[]{1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
new long[]{1, 0, 0, 0, 0, 0, 0, 0, 0, 0}),
new long[]{1, 0, 0, 0, 0, 0, 0, 0, 0, 0});
/**
* Projective point representation (X:Y:Z) satisfying x = X/Z, y = Y/Z
*
* Note that this is referred as ge_p2 in ref10 impl.
* Also note that x = X, y = Y and z = Z below following Java coding style.
*
* See
* Koyama K., Tsuruoka Y. (1993) Speeding up Elliptic Cryptosystems by Using a Signed Binary
* Window Method.
*
* https://hyperelliptic.org/EFD/g1p/auto-twisted-projective.html
*/
private static class XYZ {
final long[] x;
final long[] y;
final long[] z;
XYZ() {
this(new long[LIMB_CNT], new long[LIMB_CNT], new long[LIMB_CNT]);
}
XYZ(long[] x, long[] y, long[] z) {
this.x = x;
this.y = y;
this.z = z;
}
XYZ(XYZ xyz) {
x = Arrays.copyOf(xyz.x, LIMB_CNT);
y = Arrays.copyOf(xyz.y, LIMB_CNT);
z = Arrays.copyOf(xyz.z, LIMB_CNT);
}
XYZ(PartialXYZT partialXYZT) {
this();
fromPartialXYZT(this, partialXYZT);
}
/**
* ge_p1p1_to_p2.c
*/
static XYZ fromPartialXYZT(XYZ out, PartialXYZT in) {
Field25519.mult(out.x, in.xyz.x, in.t);
Field25519.mult(out.y, in.xyz.y, in.xyz.z);
Field25519.mult(out.z, in.xyz.z, in.t);
return out;
}
/**
* Encodes this point to bytes.
*/
byte[] toBytes() {
long[] recip = new long[LIMB_CNT];
long[] x = new long[LIMB_CNT];
long[] y = new long[LIMB_CNT];
Field25519.inverse(recip, z);
Field25519.mult(x, this.x, recip);
Field25519.mult(y, this.y, recip);
byte[] s = Field25519.contract(y);
s[31] = (byte) (s[31] ^ (getLsb(x) << 7));
return s;
}
/** Checks that the point is on curve */
boolean isOnCurve() {
long[] x2 = new long[LIMB_CNT];
Field25519.square(x2, x);
long[] y2 = new long[LIMB_CNT];
Field25519.square(y2, y);
long[] z2 = new long[LIMB_CNT];
Field25519.square(z2, z);
long[] z4 = new long[LIMB_CNT];
Field25519.square(z4, z2);
long[] lhs = new long[LIMB_CNT];
// lhs = y^2 - x^2
Field25519.sub(lhs, y2, x2);
// lhs = z^2 * (y2 - x2)
Field25519.mult(lhs, lhs, z2);
long[] rhs = new long[LIMB_CNT];
// rhs = x^2 * y^2
Field25519.mult(rhs, x2, y2);
// rhs = D * x^2 * y^2
Field25519.mult(rhs, rhs, D);
// rhs = z^4 + D * x^2 * y^2
Field25519.sum(rhs, z4);
// Field25519.mult reduces its output, but Field25519.sum does not, so we have to manually
// reduce it here.
Field25519.reduce(rhs, rhs);
// z^2 (y^2 - x^2) == z^4 + D * x^2 * y^2
return Bytes.equal(Field25519.contract(lhs), Field25519.contract(rhs));
}
}
/**
* Represents extended projective point representation (X:Y:Z:T) satisfying x = X/Z, y = Y/Z,
* XY = ZT
*
* Note that this is referred as ge_p3 in ref10 impl.
* Also note that t = T below following Java coding style.
*
* See
* Hisil H., Wong K.KH., Carter G., Dawson E. (2008) Twisted Edwards Curves Revisited.
*
* https://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html
*/
private static class XYZT {
final XYZ xyz;
final long[] t;
XYZT() {
this(new XYZ(), new long[LIMB_CNT]);
}
XYZT(XYZ xyz, long[] t) {
this.xyz = xyz;
this.t = t;
}
XYZT(PartialXYZT partialXYZT) {
this();
fromPartialXYZT(this, partialXYZT);
}
/**
* ge_p1p1_to_p2.c
*/
private static XYZT fromPartialXYZT(XYZT out, PartialXYZT in) {
Field25519.mult(out.xyz.x, in.xyz.x, in.t);
Field25519.mult(out.xyz.y, in.xyz.y, in.xyz.z);
Field25519.mult(out.xyz.z, in.xyz.z, in.t);
Field25519.mult(out.t, in.xyz.x, in.xyz.y);
return out;
}
/**
* Decodes {@code s} into an extented projective point.
* See Section 5.1.3 Decoding in https://tools.ietf.org/html/rfc8032#section-5.1.3
*/
private static XYZT fromBytesNegateVarTime(byte[] s) throws GeneralSecurityException {
long[] x = new long[LIMB_CNT];
long[] y = Field25519.expand(s);
long[] z = new long[LIMB_CNT]; z[0] = 1;
long[] t = new long[LIMB_CNT];
long[] u = new long[LIMB_CNT];
long[] v = new long[LIMB_CNT];
long[] vxx = new long[LIMB_CNT];
long[] check = new long[LIMB_CNT];
Field25519.square(u, y);
Field25519.mult(v, u, D);
Field25519.sub(u, u, z); // u = y^2 - 1
Field25519.sum(v, v, z); // v = dy^2 + 1
long[] v3 = new long[LIMB_CNT];
Field25519.square(v3, v);
Field25519.mult(v3, v3, v); // v3 = v^3
Field25519.square(x, v3);
Field25519.mult(x, x, v);
Field25519.mult(x, x, u); // x = uv^7
pow2252m3(x, x); // x = (uv^7)^((q-5)/8)
Field25519.mult(x, x, v3);
Field25519.mult(x, x, u); // x = uv^3(uv^7)^((q-5)/8)
Field25519.square(vxx, x);
Field25519.mult(vxx, vxx, v);
Field25519.sub(check, vxx, u); // vx^2-u
if (isNonZeroVarTime(check)) {
Field25519.sum(check, vxx, u); // vx^2+u
if (isNonZeroVarTime(check)) {
throw new GeneralSecurityException("Cannot convert given bytes to extended projective "
+ "coordinates. No square root exists for modulo 2^255-19");
}
Field25519.mult(x, x, SQRTM1);
}
if (!isNonZeroVarTime(x) && (s[31] & 0xff) >> 7 != 0) {
throw new GeneralSecurityException("Cannot convert given bytes to extended projective "
+ "coordinates. Computed x is zero and encoded x's least significant bit is not zero");
}
if (getLsb(x) == ((s[31] & 0xff) >> 7)) {
neg(x, x);
}
Field25519.mult(t, x, y);
return new XYZT(new XYZ(x, y, z), t);
}
}
/**
* Partial projective point representation ((X:Z),(Y:T)) satisfying x=X/Z, y=Y/T
*
* Note that this is referred as complete form in the original ref10 impl (ge_p1p1).
* Also note that t = T below following Java coding style.
*
* Although this has the same types as XYZT, it is redefined to have its own type so that it is
* readable and 1:1 corresponds to ref10 impl.
*
* Can be converted to XYZT as follows:
* X1 = X * T = x * Z * T = x * Z1
* Y1 = Y * Z = y * T * Z = y * Z1
* Z1 = Z * T = Z * T
* T1 = X * Y = x * Z * y * T = x * y * Z1 = X1Y1 / Z1
*/
private static class PartialXYZT {
final XYZ xyz;
final long[] t;
PartialXYZT() {
this(new XYZ(), new long[LIMB_CNT]);
}
PartialXYZT(XYZ xyz, long[] t) {
this.xyz = xyz;
this.t = t;
}
PartialXYZT(PartialXYZT other) {
xyz = new XYZ(other.xyz);
t = Arrays.copyOf(other.t, LIMB_CNT);
}
}
/**
* Corresponds to the caching mentioned in the last paragraph of Section 3.1 of
* Hisil H., Wong K.KH., Carter G., Dawson E. (2008) Twisted Edwards Curves Revisited.
* with Z = 1.
*/
static class CachedXYT {
final long[] yPlusX;
final long[] yMinusX;
final long[] t2d;
CachedXYT() {
this(new long[LIMB_CNT], new long[LIMB_CNT], new long[LIMB_CNT]);
}
/**
* Creates a cached XYZT with Z = 1
*
* @param yPlusX y + x
* @param yMinusX y - x
* @param t2d 2d * xy
*/
CachedXYT(long[] yPlusX, long[] yMinusX, long[] t2d) {
this.yPlusX = yPlusX;
this.yMinusX = yMinusX;
this.t2d = t2d;
}
CachedXYT(CachedXYT other) {
yPlusX = Arrays.copyOf(other.yPlusX, LIMB_CNT);
yMinusX = Arrays.copyOf(other.yMinusX, LIMB_CNT);
t2d = Arrays.copyOf(other.t2d, LIMB_CNT);
}
// z is one implicitly, so this just copies {@code in} to {@code output}.
void multByZ(long[] output, long[] in) {
System.arraycopy(in, 0, output, 0, LIMB_CNT);
}
/**
* If icopy is 1, copies {@code other} into this point. Time invariant wrt to icopy value.
*/
void copyConditional(CachedXYT other, int icopy) {
Curve25519.copyConditional(yPlusX, other.yPlusX, icopy);
Curve25519.copyConditional(yMinusX, other.yMinusX, icopy);
Curve25519.copyConditional(t2d, other.t2d, icopy);
}
}
private static class CachedXYZT extends CachedXYT {
private final long[] z;
CachedXYZT() {
this(new long[LIMB_CNT], new long[LIMB_CNT], new long[LIMB_CNT], new long[LIMB_CNT]);
}
/**
* ge_p3_to_cached.c
*/
CachedXYZT(XYZT xyzt) {
this();
Field25519.sum(yPlusX, xyzt.xyz.y, xyzt.xyz.x);
Field25519.sub(yMinusX, xyzt.xyz.y, xyzt.xyz.x);
System.arraycopy(xyzt.xyz.z, 0, z, 0, LIMB_CNT);
Field25519.mult(t2d, xyzt.t, D2);
}
/**
* Creates a cached XYZT
*
* @param yPlusX Y + X
* @param yMinusX Y - X
* @param z Z
* @param t2d 2d * (XY/Z)
*/
CachedXYZT(long[] yPlusX, long[] yMinusX, long[] z, long[] t2d) {
super(yPlusX, yMinusX, t2d);
this.z = z;
}
@Override
public void multByZ(long[] output, long[] in) {
Field25519.mult(output, in, z);
}
}
/**
* Addition defined in Section 3.1 of
* Hisil H., Wong K.KH., Carter G., Dawson E. (2008) Twisted Edwards Curves Revisited.
*
* Please note that this is a partial of the operation listed there leaving out the final
* conversion from PartialXYZT to XYZT.
*
* @param extended extended projective point input
* @param cached cached projective point input
*/
private static void add(PartialXYZT partialXYZT, XYZT extended, CachedXYT cached) {
long[] t = new long[LIMB_CNT];
// Y1 + X1
Field25519.sum(partialXYZT.xyz.x, extended.xyz.y, extended.xyz.x);
// Y1 - X1
Field25519.sub(partialXYZT.xyz.y, extended.xyz.y, extended.xyz.x);
// A = (Y1 - X1) * (Y2 - X2)
Field25519.mult(partialXYZT.xyz.y, partialXYZT.xyz.y, cached.yMinusX);
// B = (Y1 + X1) * (Y2 + X2)
Field25519.mult(partialXYZT.xyz.z, partialXYZT.xyz.x, cached.yPlusX);
// C = T1 * 2d * T2 = 2d * T1 * T2 (2d is written as k in the paper)
Field25519.mult(partialXYZT.t, extended.t, cached.t2d);
// Z1 * Z2
cached.multByZ(partialXYZT.xyz.x, extended.xyz.z);
// D = 2 * Z1 * Z2
Field25519.sum(t, partialXYZT.xyz.x, partialXYZT.xyz.x);
// X3 = B - A
Field25519.sub(partialXYZT.xyz.x, partialXYZT.xyz.z, partialXYZT.xyz.y);
// Y3 = B + A
Field25519.sum(partialXYZT.xyz.y, partialXYZT.xyz.z, partialXYZT.xyz.y);
// Z3 = D + C
Field25519.sum(partialXYZT.xyz.z, t, partialXYZT.t);
// T3 = D - C
Field25519.sub(partialXYZT.t, t, partialXYZT.t);
}
/**
* Based on the addition defined in Section 3.1 of
* Hisil H., Wong K.KH., Carter G., Dawson E. (2008) Twisted Edwards Curves Revisited.
*
* Please note that this is a partial of the operation listed there leaving out the final
* conversion from PartialXYZT to XYZT.
*
* @param extended extended projective point input
* @param cached cached projective point input
*/
private static void sub(PartialXYZT partialXYZT, XYZT extended, CachedXYT cached) {
long[] t = new long[LIMB_CNT];
// Y1 + X1
Field25519.sum(partialXYZT.xyz.x, extended.xyz.y, extended.xyz.x);
// Y1 - X1
Field25519.sub(partialXYZT.xyz.y, extended.xyz.y, extended.xyz.x);
// A = (Y1 - X1) * (Y2 + X2)
Field25519.mult(partialXYZT.xyz.y, partialXYZT.xyz.y, cached.yPlusX);
// B = (Y1 + X1) * (Y2 - X2)
Field25519.mult(partialXYZT.xyz.z, partialXYZT.xyz.x, cached.yMinusX);
// C = T1 * 2d * T2 = 2d * T1 * T2 (2d is written as k in the paper)
Field25519.mult(partialXYZT.t, extended.t, cached.t2d);
// Z1 * Z2
cached.multByZ(partialXYZT.xyz.x, extended.xyz.z);
// D = 2 * Z1 * Z2
Field25519.sum(t, partialXYZT.xyz.x, partialXYZT.xyz.x);
// X3 = B - A
Field25519.sub(partialXYZT.xyz.x, partialXYZT.xyz.z, partialXYZT.xyz.y);
// Y3 = B + A
Field25519.sum(partialXYZT.xyz.y, partialXYZT.xyz.z, partialXYZT.xyz.y);
// Z3 = D - C
Field25519.sub(partialXYZT.xyz.z, t, partialXYZT.t);
// T3 = D + C
Field25519.sum(partialXYZT.t, t, partialXYZT.t);
}
/**
* Doubles {@code p} and puts the result into this PartialXYZT.
*
* This is based on the addition defined in formula 7 in Section 3.3 of
* Hisil H., Wong K.KH., Carter G., Dawson E. (2008) Twisted Edwards Curves Revisited.
*
* Please note that this is a partial of the operation listed there leaving out the final
* conversion from PartialXYZT to XYZT and also this fixes a typo in calculation of Y3 and T3 in
* the paper, H should be replaced with A+B.
*/
private static void doubleXYZ(PartialXYZT partialXYZT, XYZ p) {
long[] t0 = new long[LIMB_CNT];
// XX = X1^2
Field25519.square(partialXYZT.xyz.x, p.x);
// YY = Y1^2
Field25519.square(partialXYZT.xyz.z, p.y);
// B' = Z1^2
Field25519.square(partialXYZT.t, p.z);
// B = 2 * B'
Field25519.sum(partialXYZT.t, partialXYZT.t, partialXYZT.t);
// A = X1 + Y1
Field25519.sum(partialXYZT.xyz.y, p.x, p.y);
// AA = A^2
Field25519.square(t0, partialXYZT.xyz.y);
// Y3 = YY + XX
Field25519.sum(partialXYZT.xyz.y, partialXYZT.xyz.z, partialXYZT.xyz.x);
// Z3 = YY - XX
Field25519.sub(partialXYZT.xyz.z, partialXYZT.xyz.z, partialXYZT.xyz.x);
// X3 = AA - Y3
Field25519.sub(partialXYZT.xyz.x, t0, partialXYZT.xyz.y);
// T3 = B - Z3
Field25519.sub(partialXYZT.t, partialXYZT.t, partialXYZT.xyz.z);
}
/**
* Doubles {@code p} and puts the result into this PartialXYZT.
*/
private static void doubleXYZT(PartialXYZT partialXYZT, XYZT p) {
doubleXYZ(partialXYZT, p.xyz);
}
/**
* Compares two byte values in constant time.
*
* Please note that this doesn't reuse {@link Curve25519#eq} method since the below inputs are
* byte values.
*/
private static int eq(int a, int b) {
int r = ~(a ^ b) & 0xff;
r &= r << 4;
r &= r << 2;
r &= r << 1;
return (r >> 7) & 1;
}
/**
* This is a constant time operation where point b*B*256^pos is stored in {@code t}.
* When b is 0, t remains the same (i.e., neutral point).
*
* Although B_TABLE[32][8] (B_TABLE[i][j] = (j+1)*B*256^i) has j values in [0, 7], the select
* method negates the corresponding point if b is negative (which is straight forward in elliptic
* curves by just negating y coordinate). Therefore we can get multiples of B with the half of
* memory requirements.
*
* @param t neutral element (i.e., point 0), also serves as output.
* @param pos in B[pos][j] = (j+1)*B*256^pos
* @param b value in [-8, 8] range.
*/
private static void select(CachedXYT t, int pos, byte b) {
int bnegative = (b & 0xff) >> 7;
int babs = b - (((-bnegative) & b) << 1);
t.copyConditional(B_TABLE[pos][0], eq(babs, 1));
t.copyConditional(B_TABLE[pos][1], eq(babs, 2));
t.copyConditional(B_TABLE[pos][2], eq(babs, 3));
t.copyConditional(B_TABLE[pos][3], eq(babs, 4));
t.copyConditional(B_TABLE[pos][4], eq(babs, 5));
t.copyConditional(B_TABLE[pos][5], eq(babs, 6));
t.copyConditional(B_TABLE[pos][6], eq(babs, 7));
t.copyConditional(B_TABLE[pos][7], eq(babs, 8));
long[] yPlusX = Arrays.copyOf(t.yMinusX, LIMB_CNT);
long[] yMinusX = Arrays.copyOf(t.yPlusX, LIMB_CNT);
long[] t2d = Arrays.copyOf(t.t2d, LIMB_CNT);
neg(t2d, t2d);
CachedXYT minust = new CachedXYT(yPlusX, yMinusX, t2d);
t.copyConditional(minust, bnegative);
}
/**
* Computes {@code a}*B
* where a = a[0]+256*a[1]+...+256^31 a[31] and
* B is the Ed25519 base point (x,4/5) with x positive.
*
* Preconditions:
* a[31] <= 127
* @throws IllegalStateException iff there is arithmetic error.
*/
@SuppressWarnings("NarrowingCompoundAssignment")
private static XYZ scalarMultWithBase(byte[] a) {
byte[] e = new byte[2 * FIELD_LEN];
for (int i = 0; i < FIELD_LEN; i++) {
e[2 * i + 0] = (byte) (((a[i] & 0xff) >> 0) & 0xf);
e[2 * i + 1] = (byte) (((a[i] & 0xff) >> 4) & 0xf);
}
// each e[i] is between 0 and 15
// e[63] is between 0 and 7
// Rewrite e in a way that each e[i] is in [-8, 8].
// This can be done since a[63] is in [0, 7], the carry-over onto the most significant byte
// a[63] can be at most 1.
int carry = 0;
for (int i = 0; i < e.length - 1; i++) {
e[i] += carry;
carry = e[i] + 8;
carry >>= 4;
e[i] -= carry << 4;
}
e[e.length - 1] += carry;
PartialXYZT ret = new PartialXYZT(NEUTRAL);
XYZT xyzt = new XYZT();
// Although B_TABLE's i can be at most 31 (stores only 32 4bit multiples of B) and we have 64
// 4bit values in e array, the below for loop adds cached values by iterating e by two in odd
// indices. After the result, we can double the result point 4 times to shift the multiplication
// scalar by 4 bits.
for (int i = 1; i < e.length; i += 2) {
CachedXYT t = new CachedXYT(CACHED_NEUTRAL);
select(t, i / 2, e[i]);
add(ret, XYZT.fromPartialXYZT(xyzt, ret), t);
}
// Doubles the result 4 times to shift the multiplication scalar 4 bits to get the actual result
// for the odd indices in e.
XYZ xyz = new XYZ();
doubleXYZ(ret, XYZ.fromPartialXYZT(xyz, ret));
doubleXYZ(ret, XYZ.fromPartialXYZT(xyz, ret));
doubleXYZ(ret, XYZ.fromPartialXYZT(xyz, ret));
doubleXYZ(ret, XYZ.fromPartialXYZT(xyz, ret));
// Add multiples of B for even indices of e.
for (int i = 0; i < e.length; i += 2) {
CachedXYT t = new CachedXYT(CACHED_NEUTRAL);
select(t, i / 2, e[i]);
add(ret, XYZT.fromPartialXYZT(xyzt, ret), t);
}
// This check is to protect against flaws, i.e. if there is a computation error through a
// faulty CPU or if the implementation contains a bug.
XYZ result = new XYZ(ret);
if (!result.isOnCurve()) {
throw new IllegalStateException("arithmetic error in scalar multiplication");
}
return result;
}
/**
* Computes {@code a}*B
* where a = a[0]+256*a[1]+...+256^31 a[31] and
* B is the Ed25519 base point (x,4/5) with x positive.
*
* Preconditions:
* a[31] <= 127
*/
static byte[] scalarMultWithBaseToBytes(byte[] a) {
return scalarMultWithBase(a).toBytes();
}
@SuppressWarnings("NarrowingCompoundAssignment")
private static byte[] slide(byte[] a) {
byte[] r = new byte[256];
// Writes each bit in a[0..31] into r[0..255]:
// a = a[0]+256*a[1]+...+256^31*a[31] is equal to
// r = r[0]+2*r[1]+...+2^255*r[255]
for (int i = 0; i < 256; i++) {
r[i] = (byte) (1 & ((a[i >> 3] & 0xff) >> (i & 7)));
}
// Transforms r[i] as odd values in [-15, 15]
for (int i = 0; i < 256; i++) {
if (r[i] != 0) {
for (int b = 1; b <= 6 && i + b < 256; b++) {
if (r[i + b] != 0) {
if (r[i] + (r[i + b] << b) <= 15) {
r[i] += r[i + b] << b;
r[i + b] = 0;
} else if (r[i] - (r[i + b] << b) >= -15) {
r[i] -= r[i + b] << b;
for (int k = i + b; k < 256; k++) {
if (r[k] == 0) {
r[k] = 1;
break;
}
r[k] = 0;
}
} else {
break;
}
}
}
}
}
return r;
}
/**
* Computes {@code a}*{@code pointA}+{@code b}*B
* where a = a[0]+256*a[1]+...+256^31*a[31].
* and b = b[0]+256*b[1]+...+256^31*b[31].
* B is the Ed25519 base point (x,4/5) with x positive.
*
* Note that execution time varies based on the input since this will only be used in verification
* of signatures.
*/
private static XYZ doubleScalarMultVarTime(byte[] a, XYZT pointA, byte[] b) {
// pointA, 3*pointA, 5*pointA, 7*pointA, 9*pointA, 11*pointA, 13*pointA, 15*pointA
CachedXYZT[] pointAArray = new CachedXYZT[8];
pointAArray[0] = new CachedXYZT(pointA);
PartialXYZT t = new PartialXYZT();
doubleXYZT(t, pointA);
XYZT doubleA = new XYZT(t);
for (int i = 1; i < pointAArray.length; i++) {
add(t, doubleA, pointAArray[i - 1]);
pointAArray[i] = new CachedXYZT(new XYZT(t));
}
byte[] aSlide = slide(a);
byte[] bSlide = slide(b);
t = new PartialXYZT(NEUTRAL);
XYZT u = new XYZT();
int i = 255;
for (; i >= 0; i--) {
if (aSlide[i] != 0 || bSlide[i] != 0) {
break;
}
}
for (; i >= 0; i--) {
doubleXYZ(t, new XYZ(t));
if (aSlide[i] > 0) {
add(t, XYZT.fromPartialXYZT(u, t), pointAArray[aSlide[i] / 2]);
} else if (aSlide[i] < 0) {
sub(t, XYZT.fromPartialXYZT(u, t), pointAArray[-aSlide[i] / 2]);
}
if (bSlide[i] > 0) {
add(t, XYZT.fromPartialXYZT(u, t), B2[bSlide[i] / 2]);
} else if (bSlide[i] < 0) {
sub(t, XYZT.fromPartialXYZT(u, t), B2[-bSlide[i] / 2]);
}
}
return new XYZ(t);
}
/**
* Returns true if {@code in} is nonzero.
*
* Note that execution time might depend on the input {@code in}.
*/
private static boolean isNonZeroVarTime(long[] in) {
long[] inCopy = new long[in.length + 1];
System.arraycopy(in, 0, inCopy, 0, in.length);
Field25519.reduceCoefficients(inCopy);
byte[] bytes = Field25519.contract(inCopy);
for (byte b : bytes) {
if (b != 0) {
return true;
}
}
return false;
}
/**
* Returns the least significant bit of {@code in}.
*/
private static int getLsb(long[] in) {
return Field25519.contract(in)[0] & 1;
}
/**
* Negates all values in {@code in} and store it in {@code out}.
*/
private static void neg(long[] out, long[] in) {
for (int i = 0; i < in.length; i++) {
out[i] = -in[i];
}
}
/**
* Computes {@code in}^(2^252-3) mod 2^255-19 and puts the result in {@code out}.
*/
private static void pow2252m3(long[] out, long[] in) {
long[] t0 = new long[LIMB_CNT];
long[] t1 = new long[LIMB_CNT];
long[] t2 = new long[LIMB_CNT];
// z2 = z1^2^1
Field25519.square(t0, in);
// z8 = z2^2^2
Field25519.square(t1, t0);
for (int i = 1; i < 2; i++) {
Field25519.square(t1, t1);
}
// z9 = z1*z8
Field25519.mult(t1, in, t1);
// z11 = z2*z9
Field25519.mult(t0, t0, t1);
// z22 = z11^2^1
Field25519.square(t0, t0);
// z_5_0 = z9*z22
Field25519.mult(t0, t1, t0);
// z_10_5 = z_5_0^2^5
Field25519.square(t1, t0);
for (int i = 1; i < 5; i++) {
Field25519.square(t1, t1);
}
// z_10_0 = z_10_5*z_5_0
Field25519.mult(t0, t1, t0);
// z_20_10 = z_10_0^2^10
Field25519.square(t1, t0);
for (int i = 1; i < 10; i++) {
Field25519.square(t1, t1);
}
// z_20_0 = z_20_10*z_10_0
Field25519.mult(t1, t1, t0);
// z_40_20 = z_20_0^2^20
Field25519.square(t2, t1);
for (int i = 1; i < 20; i++) {
Field25519.square(t2, t2);
}
// z_40_0 = z_40_20*z_20_0
Field25519.mult(t1, t2, t1);
// z_50_10 = z_40_0^2^10
Field25519.square(t1, t1);
for (int i = 1; i < 10; i++) {
Field25519.square(t1, t1);
}
// z_50_0 = z_50_10*z_10_0
Field25519.mult(t0, t1, t0);
// z_100_50 = z_50_0^2^50
Field25519.square(t1, t0);
for (int i = 1; i < 50; i++) {
Field25519.square(t1, t1);
}
// z_100_0 = z_100_50*z_50_0
Field25519.mult(t1, t1, t0);
// z_200_100 = z_100_0^2^100
Field25519.square(t2, t1);
for (int i = 1; i < 100; i++) {
Field25519.square(t2, t2);
}
// z_200_0 = z_200_100*z_100_0
Field25519.mult(t1, t2, t1);
// z_250_50 = z_200_0^2^50
Field25519.square(t1, t1);
for (int i = 1; i < 50; i++) {
Field25519.square(t1, t1);
}
// z_250_0 = z_250_50*z_50_0
Field25519.mult(t0, t1, t0);
// z_252_2 = z_250_0^2^2
Field25519.square(t0, t0);
for (int i = 1; i < 2; i++) {
Field25519.square(t0, t0);
}
// z_252_3 = z_252_2*z1
Field25519.mult(out, t0, in);
}
/**
* Returns 3 bytes of {@code in} starting from {@code idx} in Little-Endian format.
*/
private static long load3(byte[] in, int idx) {
long result;
result = (long) in[idx] & 0xff;
result |= (long) (in[idx + 1] & 0xff) << 8;
result |= (long) (in[idx + 2] & 0xff) << 16;
return result;
}
/**
* Returns 4 bytes of {@code in} starting from {@code idx} in Little-Endian format.
*/
private static long load4(byte[] in, int idx) {
long result = load3(in, idx);
result |= (long) (in[idx + 3] & 0xff) << 24;
return result;
}
/**
* Input:
* s[0]+256*s[1]+...+256^63*s[63] = s
*
* Output:
* s[0]+256*s[1]+...+256^31*s[31] = s mod l
* where l = 2^252 + 27742317777372353535851937790883648493.
* Overwrites s in place.
*/
private static void reduce(byte[] s) {
// Observation:
// 2^252 mod l is equivalent to -27742317777372353535851937790883648493 mod l
// Let m = -27742317777372353535851937790883648493
// Thus a*2^252+b mod l is equivalent to a*m+b mod l
//
// First s is divided into chunks of 21 bits as follows:
// s0+2^21*s1+2^42*s3+...+2^462*s23 = s[0]+256*s[1]+...+256^63*s[63]
long s0 = 2097151 & load3(s, 0);
long s1 = 2097151 & (load4(s, 2) >> 5);
long s2 = 2097151 & (load3(s, 5) >> 2);
long s3 = 2097151 & (load4(s, 7) >> 7);
long s4 = 2097151 & (load4(s, 10) >> 4);
long s5 = 2097151 & (load3(s, 13) >> 1);
long s6 = 2097151 & (load4(s, 15) >> 6);
long s7 = 2097151 & (load3(s, 18) >> 3);
long s8 = 2097151 & load3(s, 21);
long s9 = 2097151 & (load4(s, 23) >> 5);
long s10 = 2097151 & (load3(s, 26) >> 2);
long s11 = 2097151 & (load4(s, 28) >> 7);
long s12 = 2097151 & (load4(s, 31) >> 4);
long s13 = 2097151 & (load3(s, 34) >> 1);
long s14 = 2097151 & (load4(s, 36) >> 6);
long s15 = 2097151 & (load3(s, 39) >> 3);
long s16 = 2097151 & load3(s, 42);
long s17 = 2097151 & (load4(s, 44) >> 5);
long s18 = 2097151 & (load3(s, 47) >> 2);
long s19 = 2097151 & (load4(s, 49) >> 7);
long s20 = 2097151 & (load4(s, 52) >> 4);
long s21 = 2097151 & (load3(s, 55) >> 1);
long s22 = 2097151 & (load4(s, 57) >> 6);
long s23 = (load4(s, 60) >> 3);
long carry0;
long carry1;
long carry2;
long carry3;
long carry4;
long carry5;
long carry6;
long carry7;
long carry8;
long carry9;
long carry10;
long carry11;
long carry12;
long carry13;
long carry14;
long carry15;
long carry16;
// s23*2^462 = s23*2^210*2^252 is equivalent to s23*2^210*m in mod l
// As m is a 125 bit number, the result needs to scattered to 6 limbs (125/21 ceil is 6)
// starting from s11 (s11*2^210)
// m = [666643, 470296, 654183, -997805, 136657, -683901] in 21-bit limbs
s11 += s23 * 666643;
s12 += s23 * 470296;
s13 += s23 * 654183;
s14 -= s23 * 997805;
s15 += s23 * 136657;
s16 -= s23 * 683901;
// s23 = 0;
s10 += s22 * 666643;
s11 += s22 * 470296;
s12 += s22 * 654183;
s13 -= s22 * 997805;
s14 += s22 * 136657;
s15 -= s22 * 683901;
// s22 = 0;
s9 += s21 * 666643;
s10 += s21 * 470296;
s11 += s21 * 654183;
s12 -= s21 * 997805;
s13 += s21 * 136657;
s14 -= s21 * 683901;
// s21 = 0;
s8 += s20 * 666643;
s9 += s20 * 470296;
s10 += s20 * 654183;
s11 -= s20 * 997805;
s12 += s20 * 136657;
s13 -= s20 * 683901;
// s20 = 0;
s7 += s19 * 666643;
s8 += s19 * 470296;
s9 += s19 * 654183;
s10 -= s19 * 997805;
s11 += s19 * 136657;
s12 -= s19 * 683901;
// s19 = 0;
s6 += s18 * 666643;
s7 += s18 * 470296;
s8 += s18 * 654183;
s9 -= s18 * 997805;
s10 += s18 * 136657;
s11 -= s18 * 683901;
// s18 = 0;
// Reduce the bit length of limbs from s6 to s15 to 21-bits.
carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
// Resume reduction where we left off.
s5 += s17 * 666643;
s6 += s17 * 470296;
s7 += s17 * 654183;
s8 -= s17 * 997805;
s9 += s17 * 136657;
s10 -= s17 * 683901;
// s17 = 0;
s4 += s16 * 666643;
s5 += s16 * 470296;
s6 += s16 * 654183;
s7 -= s16 * 997805;
s8 += s16 * 136657;
s9 -= s16 * 683901;
// s16 = 0;
s3 += s15 * 666643;
s4 += s15 * 470296;
s5 += s15 * 654183;
s6 -= s15 * 997805;
s7 += s15 * 136657;
s8 -= s15 * 683901;
// s15 = 0;
s2 += s14 * 666643;
s3 += s14 * 470296;
s4 += s14 * 654183;
s5 -= s14 * 997805;
s6 += s14 * 136657;
s7 -= s14 * 683901;
// s14 = 0;
s1 += s13 * 666643;
s2 += s13 * 470296;
s3 += s13 * 654183;
s4 -= s13 * 997805;
s5 += s13 * 136657;
s6 -= s13 * 683901;
// s13 = 0;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
// Reduce the range of limbs from s0 to s11 to 21-bits.
carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
// Carry chain reduction to propagate excess bits from s0 to s5 to the most significant limbs.
carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21;
// Do one last reduction as s12 might be 1.
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
// s12 = 0;
carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
// Serialize the result into the s.
s[0] = (byte) s0;
s[1] = (byte) (s0 >> 8);
s[2] = (byte) ((s0 >> 16) | (s1 << 5));
s[3] = (byte) (s1 >> 3);
s[4] = (byte) (s1 >> 11);
s[5] = (byte) ((s1 >> 19) | (s2 << 2));
s[6] = (byte) (s2 >> 6);
s[7] = (byte) ((s2 >> 14) | (s3 << 7));
s[8] = (byte) (s3 >> 1);
s[9] = (byte) (s3 >> 9);
s[10] = (byte) ((s3 >> 17) | (s4 << 4));
s[11] = (byte) (s4 >> 4);
s[12] = (byte) (s4 >> 12);
s[13] = (byte) ((s4 >> 20) | (s5 << 1));
s[14] = (byte) (s5 >> 7);
s[15] = (byte) ((s5 >> 15) | (s6 << 6));
s[16] = (byte) (s6 >> 2);
s[17] = (byte) (s6 >> 10);
s[18] = (byte) ((s6 >> 18) | (s7 << 3));
s[19] = (byte) (s7 >> 5);
s[20] = (byte) (s7 >> 13);
s[21] = (byte) s8;
s[22] = (byte) (s8 >> 8);
s[23] = (byte) ((s8 >> 16) | (s9 << 5));
s[24] = (byte) (s9 >> 3);
s[25] = (byte) (s9 >> 11);
s[26] = (byte) ((s9 >> 19) | (s10 << 2));
s[27] = (byte) (s10 >> 6);
s[28] = (byte) ((s10 >> 14) | (s11 << 7));
s[29] = (byte) (s11 >> 1);
s[30] = (byte) (s11 >> 9);
s[31] = (byte) (s11 >> 17);
}
/**
* Input:
* a[0]+256*a[1]+...+256^31*a[31] = a
* b[0]+256*b[1]+...+256^31*b[31] = b
* c[0]+256*c[1]+...+256^31*c[31] = c
*
* Output:
* s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
* where l = 2^252 + 27742317777372353535851937790883648493.
*/
private static void mulAdd(byte[] s, byte[] a, byte[] b, byte[] c) {
// This is very similar to Ed25519.reduce, the difference in here is that it computes ab+c
// See Ed25519.reduce for related comments.
long a0 = 2097151 & load3(a, 0);
long a1 = 2097151 & (load4(a, 2) >> 5);
long a2 = 2097151 & (load3(a, 5) >> 2);
long a3 = 2097151 & (load4(a, 7) >> 7);
long a4 = 2097151 & (load4(a, 10) >> 4);
long a5 = 2097151 & (load3(a, 13) >> 1);
long a6 = 2097151 & (load4(a, 15) >> 6);
long a7 = 2097151 & (load3(a, 18) >> 3);
long a8 = 2097151 & load3(a, 21);
long a9 = 2097151 & (load4(a, 23) >> 5);
long a10 = 2097151 & (load3(a, 26) >> 2);
long a11 = (load4(a, 28) >> 7);
long b0 = 2097151 & load3(b, 0);
long b1 = 2097151 & (load4(b, 2) >> 5);
long b2 = 2097151 & (load3(b, 5) >> 2);
long b3 = 2097151 & (load4(b, 7) >> 7);
long b4 = 2097151 & (load4(b, 10) >> 4);
long b5 = 2097151 & (load3(b, 13) >> 1);
long b6 = 2097151 & (load4(b, 15) >> 6);
long b7 = 2097151 & (load3(b, 18) >> 3);
long b8 = 2097151 & load3(b, 21);
long b9 = 2097151 & (load4(b, 23) >> 5);
long b10 = 2097151 & (load3(b, 26) >> 2);
long b11 = (load4(b, 28) >> 7);
long c0 = 2097151 & load3(c, 0);
long c1 = 2097151 & (load4(c, 2) >> 5);
long c2 = 2097151 & (load3(c, 5) >> 2);
long c3 = 2097151 & (load4(c, 7) >> 7);
long c4 = 2097151 & (load4(c, 10) >> 4);
long c5 = 2097151 & (load3(c, 13) >> 1);
long c6 = 2097151 & (load4(c, 15) >> 6);
long c7 = 2097151 & (load3(c, 18) >> 3);
long c8 = 2097151 & load3(c, 21);
long c9 = 2097151 & (load4(c, 23) >> 5);
long c10 = 2097151 & (load3(c, 26) >> 2);
long c11 = (load4(c, 28) >> 7);
long s0;
long s1;
long s2;
long s3;
long s4;
long s5;
long s6;
long s7;
long s8;
long s9;
long s10;
long s11;
long s12;
long s13;
long s14;
long s15;
long s16;
long s17;
long s18;
long s19;
long s20;
long s21;
long s22;
long s23;
long carry0;
long carry1;
long carry2;
long carry3;
long carry4;
long carry5;
long carry6;
long carry7;
long carry8;
long carry9;
long carry10;
long carry11;
long carry12;
long carry13;
long carry14;
long carry15;
long carry16;
long carry17;
long carry18;
long carry19;
long carry20;
long carry21;
long carry22;
s0 = c0 + a0 * b0;
s1 = c1 + a0 * b1 + a1 * b0;
s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1
+ a8 * b0;
s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2
+ a8 * b1 + a9 * b0;
s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3
+ a8 * b2 + a9 * b1 + a10 * b0;
s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4
+ a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3
+ a10 * b2 + a11 * b1;
s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4 + a10 * b3
+ a11 * b2;
s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + a10 * b4
+ a11 * b3;
s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + a11 * b4;
s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
s20 = a9 * b11 + a10 * b10 + a11 * b9;
s21 = a10 * b11 + a11 * b10;
s22 = a11 * b11;
s23 = 0;
carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
carry18 = (s18 + (1 << 20)) >> 21; s19 += carry18; s18 -= carry18 << 21;
carry20 = (s20 + (1 << 20)) >> 21; s21 += carry20; s20 -= carry20 << 21;
carry22 = (s22 + (1 << 20)) >> 21; s23 += carry22; s22 -= carry22 << 21;
carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
carry17 = (s17 + (1 << 20)) >> 21; s18 += carry17; s17 -= carry17 << 21;
carry19 = (s19 + (1 << 20)) >> 21; s20 += carry19; s19 -= carry19 << 21;
carry21 = (s21 + (1 << 20)) >> 21; s22 += carry21; s21 -= carry21 << 21;
s11 += s23 * 666643;
s12 += s23 * 470296;
s13 += s23 * 654183;
s14 -= s23 * 997805;
s15 += s23 * 136657;
s16 -= s23 * 683901;
// s23 = 0;
s10 += s22 * 666643;
s11 += s22 * 470296;
s12 += s22 * 654183;
s13 -= s22 * 997805;
s14 += s22 * 136657;
s15 -= s22 * 683901;
// s22 = 0;
s9 += s21 * 666643;
s10 += s21 * 470296;
s11 += s21 * 654183;
s12 -= s21 * 997805;
s13 += s21 * 136657;
s14 -= s21 * 683901;
// s21 = 0;
s8 += s20 * 666643;
s9 += s20 * 470296;
s10 += s20 * 654183;
s11 -= s20 * 997805;
s12 += s20 * 136657;
s13 -= s20 * 683901;
// s20 = 0;
s7 += s19 * 666643;
s8 += s19 * 470296;
s9 += s19 * 654183;
s10 -= s19 * 997805;
s11 += s19 * 136657;
s12 -= s19 * 683901;
// s19 = 0;
s6 += s18 * 666643;
s7 += s18 * 470296;
s8 += s18 * 654183;
s9 -= s18 * 997805;
s10 += s18 * 136657;
s11 -= s18 * 683901;
// s18 = 0;
carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
s5 += s17 * 666643;
s6 += s17 * 470296;
s7 += s17 * 654183;
s8 -= s17 * 997805;
s9 += s17 * 136657;
s10 -= s17 * 683901;
// s17 = 0;
s4 += s16 * 666643;
s5 += s16 * 470296;
s6 += s16 * 654183;
s7 -= s16 * 997805;
s8 += s16 * 136657;
s9 -= s16 * 683901;
// s16 = 0;
s3 += s15 * 666643;
s4 += s15 * 470296;
s5 += s15 * 654183;
s6 -= s15 * 997805;
s7 += s15 * 136657;
s8 -= s15 * 683901;
// s15 = 0;
s2 += s14 * 666643;
s3 += s14 * 470296;
s4 += s14 * 654183;
s5 -= s14 * 997805;
s6 += s14 * 136657;
s7 -= s14 * 683901;
// s14 = 0;
s1 += s13 * 666643;
s2 += s13 * 470296;
s3 += s13 * 654183;
s4 -= s13 * 997805;
s5 += s13 * 136657;
s6 -= s13 * 683901;
// s13 = 0;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
s12 = 0;
carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
carry11 = s11 >> 21; s12 += carry11; s11 -= carry11 << 21;
s0 += s12 * 666643;
s1 += s12 * 470296;
s2 += s12 * 654183;
s3 -= s12 * 997805;
s4 += s12 * 136657;
s5 -= s12 * 683901;
// s12 = 0;
carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
carry3 = s3 >> 21; s4 += carry3; s3 -= carry3 << 21;
carry4 = s4 >> 21; s5 += carry4; s4 -= carry4 << 21;
carry5 = s5 >> 21; s6 += carry5; s5 -= carry5 << 21;
carry6 = s6 >> 21; s7 += carry6; s6 -= carry6 << 21;
carry7 = s7 >> 21; s8 += carry7; s7 -= carry7 << 21;
carry8 = s8 >> 21; s9 += carry8; s8 -= carry8 << 21;
carry9 = s9 >> 21; s10 += carry9; s9 -= carry9 << 21;
carry10 = s10 >> 21; s11 += carry10; s10 -= carry10 << 21;
s[0] = (byte) s0;
s[1] = (byte) (s0 >> 8);
s[2] = (byte) ((s0 >> 16) | (s1 << 5));
s[3] = (byte) (s1 >> 3);
s[4] = (byte) (s1 >> 11);
s[5] = (byte) ((s1 >> 19) | (s2 << 2));
s[6] = (byte) (s2 >> 6);
s[7] = (byte) ((s2 >> 14) | (s3 << 7));
s[8] = (byte) (s3 >> 1);
s[9] = (byte) (s3 >> 9);
s[10] = (byte) ((s3 >> 17) | (s4 << 4));
s[11] = (byte) (s4 >> 4);
s[12] = (byte) (s4 >> 12);
s[13] = (byte) ((s4 >> 20) | (s5 << 1));
s[14] = (byte) (s5 >> 7);
s[15] = (byte) ((s5 >> 15) | (s6 << 6));
s[16] = (byte) (s6 >> 2);
s[17] = (byte) (s6 >> 10);
s[18] = (byte) ((s6 >> 18) | (s7 << 3));
s[19] = (byte) (s7 >> 5);
s[20] = (byte) (s7 >> 13);
s[21] = (byte) s8;
s[22] = (byte) (s8 >> 8);
s[23] = (byte) ((s8 >> 16) | (s9 << 5));
s[24] = (byte) (s9 >> 3);
s[25] = (byte) (s9 >> 11);
s[26] = (byte) ((s9 >> 19) | (s10 << 2));
s[27] = (byte) (s10 >> 6);
s[28] = (byte) ((s10 >> 14) | (s11 << 7));
s[29] = (byte) (s11 >> 1);
s[30] = (byte) (s11 >> 9);
s[31] = (byte) (s11 >> 17);
}
static byte[] getHashedScalar(final byte[] privateKey)
throws GeneralSecurityException {
MessageDigest digest = EngineFactory.MESSAGE_DIGEST.getInstance("SHA-512");
digest.update(privateKey, 0, FIELD_LEN);
byte[] h = digest.digest();
// https://tools.ietf.org/html/rfc8032#section-5.1.2.
// Clear the lowest three bits of the first octet.
h[0] = (byte) (h[0] & 248);
// Clear the highest bit of the last octet.
h[31] = (byte) (h[31] & 127);
// Set the second highest bit if the last octet.
h[31] = (byte) (h[31] | 64);
return h;
}
/**
* Returns the EdDSA signature for the {@code message} based on the {@code hashedPrivateKey}.
*
* @param message to sign
* @param publicKey {@link Ed25519#scalarMultToBytes(byte[])} of {@code hashedPrivateKey}
* @param hashedPrivateKey {@link Ed25519#getHashedScalar(byte[])} of the private key
* @return signature for the {@code message}.
* @throws GeneralSecurityException if there is no SHA-512 algorithm defined in
* {@link EngineFactory}.MESSAGE_DIGEST.
*/
static byte[] sign(final byte[] message, final byte[] publicKey, final byte[] hashedPrivateKey)
throws GeneralSecurityException {
// Copying the message to make it thread-safe. Otherwise, if the caller modifies the message
// between the first and the second hash then it might leak the private key.
byte[] messageCopy = Arrays.copyOfRange(message, 0, message.length);
MessageDigest digest = EngineFactory.MESSAGE_DIGEST.getInstance("SHA-512");
digest.update(hashedPrivateKey, FIELD_LEN, FIELD_LEN);
digest.update(messageCopy);
byte[] r = digest.digest();
reduce(r);
byte[] rB = Arrays.copyOfRange(scalarMultWithBase(r).toBytes(), 0, FIELD_LEN);
digest.reset();
digest.update(rB);
digest.update(publicKey);
digest.update(messageCopy);
byte[] hram = digest.digest();
reduce(hram);
byte[] s = new byte[FIELD_LEN];
mulAdd(s, hram, hashedPrivateKey, r);
return Bytes.concat(rB, s);
}
// The order of the generator as unsigned bytes in little endian order.
// (2^252 + 0x14def9dea2f79cd65812631a5cf5d3ed, cf. RFC 7748)
static final byte[] GROUP_ORDER = new byte[] {
(byte) 0xed, (byte) 0xd3, (byte) 0xf5, (byte) 0x5c,
(byte) 0x1a, (byte) 0x63, (byte) 0x12, (byte) 0x58,
(byte) 0xd6, (byte) 0x9c, (byte) 0xf7, (byte) 0xa2,
(byte) 0xde, (byte) 0xf9, (byte) 0xde, (byte) 0x14,
(byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
(byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
(byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x00,
(byte) 0x00, (byte) 0x00, (byte) 0x00, (byte) 0x10};
// Checks whether s represents an integer smaller than the order of the group.
// This is needed to ensure that EdDSA signatures are non-malleable, as failing to check
// the range of S allows to modify signatures (cf. RFC 8032, Section 5.2.7 and Section 8.4.)
// @param s an integer in little-endian order.
private static boolean isSmallerThanGroupOrder(byte[] s) {
for (int j = FIELD_LEN - 1; j >= 0; j--) {
// compare unsigned bytes
int a = s[j] & 0xff;
int b = GROUP_ORDER[j] & 0xff;
if (a != b) {
return a < b;
}
}
return false;
}
/**
* Returns true if the EdDSA {@code signature} with {@code message}, can be verified with
* {@code publicKey}.
*
* @throws GeneralSecurityException if there is no SHA-512 algorithm defined in
* {@link EngineFactory}.MESSAGE_DIGEST.
*/
static boolean verify(final byte[] message, final byte[] signature,
final byte[] publicKey) throws GeneralSecurityException {
if (signature.length != SIGNATURE_LEN) {
return false;
}
byte[] s = Arrays.copyOfRange(signature, FIELD_LEN, SIGNATURE_LEN);
if (!isSmallerThanGroupOrder(s)) {
return false;
}
MessageDigest digest = EngineFactory.MESSAGE_DIGEST.getInstance("SHA-512");
digest.update(signature, 0, FIELD_LEN);
digest.update(publicKey);
digest.update(message);
byte[] h = digest.digest();
reduce(h);
XYZT negPublicKey = XYZT.fromBytesNegateVarTime(publicKey);
XYZ xyz = doubleScalarMultVarTime(h, negPublicKey, s);
byte[] expectedR = xyz.toBytes();
for (int i = 0; i < FIELD_LEN; i++) {
if (expectedR[i] != signature[i]) {
return false;
}
}
return true;
}
}