Merge pull request #72 from carlescufi/master

Add a new compile-time macro to use the local native format for VLI
diff --git a/curve-specific.inc b/curve-specific.inc
index 81f725f..0453b21 100644
--- a/curve-specific.inc
+++ b/curve-specific.inc
@@ -1215,7 +1215,7 @@
     
     for (k = 0; k < num_words_secp256k1; ++k) {
         uint64_t p = (uint64_t)0x3D1 * right[k] + carry;
-        result[k] = p;
+        result[k] = (uint32_t) p;
         carry = p >> 32;
     }
     result[num_words_secp256k1] = carry;
diff --git a/uECC.c b/uECC.c
index f65fe37..f3dacb8 100644
--- a/uECC.c
+++ b/uECC.c
@@ -157,6 +157,15 @@
 #endif
 };
 
+static void bcopy(uint8_t *dst,
+                  const uint8_t *src,
+                  unsigned num_bytes) {
+    while (0 != num_bytes) {
+        num_bytes--;
+        dst[num_bytes] = src[num_bytes];
+    }
+}
+
 static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left,
                                        const uECC_word_t *right,
                                        wordcount_t num_words);
@@ -985,8 +994,13 @@
 int uECC_make_key(uint8_t *public_key,
                   uint8_t *private_key,
                   uECC_Curve curve) {
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    uECC_word_t *private = (uECC_word_t *)private_key;
+    uECC_word_t *public = (uECC_word_t *)public_key;
+#else
     uECC_word_t private[uECC_MAX_WORDS];
     uECC_word_t public[uECC_MAX_WORDS * 2];
+#endif
     uECC_word_t tries;
 
     for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) {
@@ -995,10 +1009,12 @@
         }
 
         if (EccPoint_compute_public_key(public, private, curve)) {
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
             uECC_vli_nativeToBytes(private_key, BITS_TO_BYTES(curve->num_n_bits), private);
             uECC_vli_nativeToBytes(public_key, curve->num_bytes, public);
             uECC_vli_nativeToBytes(
                 public_key + curve->num_bytes, curve->num_bytes, public + curve->num_words);
+#endif
             return 1;
         }
     }
@@ -1011,6 +1027,7 @@
                        uECC_Curve curve) {
     uECC_word_t public[uECC_MAX_WORDS * 2];
     uECC_word_t private[uECC_MAX_WORDS];
+
     uECC_word_t tmp[uECC_MAX_WORDS];
     uECC_word_t *p2[2] = {private, tmp};
     uECC_word_t *initial_Z = 0;
@@ -1018,9 +1035,14 @@
     wordcount_t num_words = curve->num_words;
     wordcount_t num_bytes = curve->num_bytes;
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy((uint8_t *) private, private_key, num_bytes);
+    bcopy((uint8_t *) public, public_key, num_bytes*2);
+#else
     uECC_vli_bytesToNative(private, private_key, BITS_TO_BYTES(curve->num_n_bits));
     uECC_vli_bytesToNative(public, public_key, num_bytes);
     uECC_vli_bytesToNative(public + num_words, public_key + num_bytes, num_bytes);
+#endif
 
     /* Regularize the bitcount for the private key so that attackers cannot use a side channel
        attack to learn the number of leading zeros. */
@@ -1036,7 +1058,11 @@
     }
 
     EccPoint_mult(public, public, p2[!carry], initial_Z, curve->num_n_bits + 1, curve);
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy((uint8_t *) secret, (uint8_t *) public, num_bytes);
+#else
     uECC_vli_nativeToBytes(secret, num_bytes, public);
+#endif
     return !EccPoint_isZero(public, curve);
 }
 
@@ -1046,13 +1072,25 @@
     for (i = 0; i < curve->num_bytes; ++i) {
         compressed[i+1] = public_key[i];
     }
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    compressed[0] = 2 + (public_key[curve->num_bytes] & 0x01);
+#else
     compressed[0] = 2 + (public_key[curve->num_bytes * 2 - 1] & 0x01);
+#endif
 }
 
 void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve) {
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    uECC_word_t *point = (uECC_word_t *)public_key;
+#else
     uECC_word_t point[uECC_MAX_WORDS * 2];
+#endif
     uECC_word_t *y = point + curve->num_words;
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy(public_key, compressed+1, curve->num_bytes);
+#else
     uECC_vli_bytesToNative(point, compressed + 1, curve->num_bytes);
+#endif
     curve->x_side(y, point, curve);
     curve->mod_sqrt(y, curve);
 
@@ -1060,8 +1098,10 @@
         uECC_vli_sub(y, curve->p, y, curve->num_words);
     }
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
     uECC_vli_nativeToBytes(public_key, curve->num_bytes, point);
     uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, y);
+#endif
 }
 #endif /* uECC_SUPPORT_COMPRESSED_POINT */
 
@@ -1089,19 +1129,32 @@
 }
 
 int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve) {
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    uECC_word_t *public = (uECC_word_t *)public_key;
+#else
     uECC_word_t public[uECC_MAX_WORDS * 2];
+#endif
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
     uECC_vli_bytesToNative(public, public_key, curve->num_bytes);
     uECC_vli_bytesToNative(
         public + curve->num_words, public_key + curve->num_bytes, curve->num_bytes);
+#endif
     return uECC_valid_point(public, curve);
 }
 
 int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve) {
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    uECC_word_t *private = (uECC_word_t *)private_key;
+    uECC_word_t *public = (uECC_word_t *)public_key;
+#else
     uECC_word_t private[uECC_MAX_WORDS];
     uECC_word_t public[uECC_MAX_WORDS * 2];
+#endif
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
     uECC_vli_bytesToNative(private, private_key, BITS_TO_BYTES(curve->num_n_bits));
+#endif
 
     /* Make sure the private key is in the range [1, n-1]. */
     if (uECC_vli_isZero(private, BITS_TO_WORDS(curve->num_n_bits))) {
@@ -1117,9 +1170,11 @@
         return 0;
     }
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
     uECC_vli_nativeToBytes(public_key, curve->num_bytes, public);
     uECC_vli_nativeToBytes(
         public_key + curve->num_bytes, curve->num_bytes, public + curve->num_words);
+#endif
     return 1;
 }
 
@@ -1132,17 +1187,26 @@
                      uECC_Curve curve) {
     unsigned num_n_bytes = BITS_TO_BYTES(curve->num_n_bits);
     unsigned num_n_words = BITS_TO_WORDS(curve->num_n_bits);
+    int shift;
+    uECC_word_t carry;
+    uECC_word_t *ptr;
+
     if (bits_size > num_n_bytes) {
         bits_size = num_n_bytes;
     }
+
     uECC_vli_clear(native, num_n_words);
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy((uint8_t *) native, bits, bits_size);
+#else
     uECC_vli_bytesToNative(native, bits, bits_size);
+#endif    
     if (bits_size * 8 <= (unsigned)curve->num_n_bits) {
         return;
     }
-    int shift = bits_size * 8 - curve->num_n_bits;
-    uECC_word_t carry = 0;
-    uECC_word_t *ptr = native + num_n_words;
+    shift = bits_size * 8 - curve->num_n_bits;
+    carry = 0;
+    ptr = native + num_n_words;
     while (ptr-- > native) {
         uECC_word_t temp = *ptr;
         *ptr = (temp >> shift) | carry;
@@ -1161,10 +1225,15 @@
                             uECC_word_t *k,
                             uint8_t *signature,
                             uECC_Curve curve) {
+
     uECC_word_t tmp[uECC_MAX_WORDS];
     uECC_word_t s[uECC_MAX_WORDS];
     uECC_word_t *k2[2] = {tmp, s};
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    uECC_word_t *p = (uECC_word_t *)signature;
+#else
     uECC_word_t p[uECC_MAX_WORDS * 2];
+#endif
     uECC_word_t carry;
     wordcount_t num_words = curve->num_words;
     wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits);
@@ -1196,9 +1265,16 @@
     uECC_vli_modInv(k, k, curve->n, num_n_words);       /* k = 1 / k' */
     uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k = 1 / k */
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
     uECC_vli_nativeToBytes(signature, curve->num_bytes, p); /* store r */
+#endif
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy((uint8_t *) tmp, private_key, BITS_TO_BYTES(curve->num_n_bits));
+#else
     uECC_vli_bytesToNative(tmp, private_key, BITS_TO_BYTES(curve->num_n_bits)); /* tmp = d */
+#endif
+
     s[num_n_words - 1] = 0;
     uECC_vli_set(s, p, num_words);
     uECC_vli_modMult(s, tmp, s, curve->n, num_n_words); /* s = r*d */
@@ -1209,7 +1285,11 @@
     if (uECC_vli_numBits(s, num_n_words) > (bitcount_t)curve->num_bytes * 8) {
         return 0;
     }
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy((uint8_t *) signature + curve->num_bytes, (uint8_t *) s, curve->num_bytes);
+#else
     uECC_vli_nativeToBytes(signature + curve->num_bytes, curve->num_bytes, s);
+#endif    
     return 1;
 }
 
@@ -1368,7 +1448,6 @@
                 uECC_Curve curve) {
     uECC_word_t u1[uECC_MAX_WORDS], u2[uECC_MAX_WORDS];
     uECC_word_t z[uECC_MAX_WORDS];
-    uECC_word_t public[uECC_MAX_WORDS * 2];
     uECC_word_t sum[uECC_MAX_WORDS * 2];
     uECC_word_t rx[uECC_MAX_WORDS];
     uECC_word_t ry[uECC_MAX_WORDS];
@@ -1379,6 +1458,11 @@
     const uECC_word_t *point;
     bitcount_t num_bits;
     bitcount_t i;
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    uECC_word_t *public = (uECC_word_t *)public_key;
+#else
+    uECC_word_t public[uECC_MAX_WORDS * 2];
+#endif    
     uECC_word_t r[uECC_MAX_WORDS], s[uECC_MAX_WORDS];
     wordcount_t num_words = curve->num_words;
     wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits);
@@ -1387,11 +1471,16 @@
     r[num_n_words - 1] = 0;
     s[num_n_words - 1] = 0;
 
+#if uECC_VLI_NATIVE_LITTLE_ENDIAN
+    bcopy((uint8_t *) r, signature, curve->num_bytes);
+    bcopy((uint8_t *) s, signature + curve->num_bytes, curve->num_bytes);
+#else
     uECC_vli_bytesToNative(public, public_key, curve->num_bytes);
     uECC_vli_bytesToNative(
         public + num_words, public_key + curve->num_bytes, curve->num_bytes);
     uECC_vli_bytesToNative(r, signature, curve->num_bytes);
     uECC_vli_bytesToNative(s, signature + curve->num_bytes, curve->num_bytes);
+#endif
 
     /* r, s must not be 0. */
     if (uECC_vli_isZero(r, num_words) || uECC_vli_isZero(s, num_words)) {
diff --git a/uECC.h b/uECC.h
index 5f96957..dcc8e82 100644
--- a/uECC.h
+++ b/uECC.h
@@ -35,6 +35,18 @@
     #define uECC_SQUARE_FUNC 0
 #endif
 
+/* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will switch to native
+little-endian format for *all* arrays passed in and out of the public API. This includes public 
+and private keys, shared secrets, signatures and message hashes. 
+Using this switch reduces the amount of call stack memory used by uECC, since less intermediate
+translations are required. 
+Note that this will *only* work on native little-endian processors and it will treat the uint8_t
+arrays passed into the public API as word arrays, therefore requiring the provided byte arrays 
+to be word aligned on architectures that do not support unaligned accesses. */
+#ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN
+    #define uECC_VLI_NATIVE_LITTLE_ENDIAN 0
+#endif
+
 /* Curve support selection. Set to 0 to remove that curve. */
 #ifndef uECC_SUPPORTS_secp160r1
     #define uECC_SUPPORTS_secp160r1 1