Simplify vli_mmod_fast.
diff --git a/ecc.c b/ecc.c
index 305ba85..f83eaef 100644
--- a/ecc.c
+++ b/ecc.c
@@ -3658,6 +3658,7 @@
static void vli_mmod_fast(uint8_t *RESTRICT p_result, uint8_t *RESTRICT p_product)
{
#if (ECC_ASM == ecc_asm_avr)
+ uint8_t l_carry = 0;
__asm__ volatile (
"in r30, __SP_L__ \n\t"
"in r31, __SP_H__ \n\t"
@@ -3668,8 +3669,6 @@
"out __SREG__, r0 \n\t"
"out __SP_L__, r30 \n\t"
- "eor r20, r20 \n\t" /* r20 = 0 (carry count) */
-
"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes (+ 1 since z initially points below the stack) */
"adiw r26, 40 \n\t" /* end of p_product */
"ld r18, -x \n\t" /* Load word. */
@@ -3734,7 +3733,7 @@
"ld r19, x+ \n\t"
"adc r18, r19 \n\t"
"st y+, r18 \n\t")
- "adc r20, __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
+ "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
/* at this point x is at the end of p_product, y is at the end of p_result, z is 20 bytes into tmp */
"sbiw r28, 20 \n\t" /* move y back to point at p_result */
@@ -3823,51 +3822,33 @@
"adc r18, __zero_reg__ \n\t"
"st y+, r18 \n\t")
- "adc r20, __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
+ "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
"sbiw r28, 20 \n\t" /* move y back to point at p_result */
"mmod_after_remult: \n\t"
- "sbiw r30, 24 \n\t" /* move z back to point at tmp */
-
- /* carry is <= 2, so subtract up to 2 times */
- "cpse r20, __zero_reg__ \n\t"
- "rcall mmod_sub \n\t"
- "cpse r20, __zero_reg__ \n\t"
- "rcall mmod_sub \n\t"
- "rjmp mmod_end \n\t"
-
- "mmod_sub: \n\t"
-
- /* subtract curve_p (loaded into x) from p_result (in y) */
- "ldi r26, lo8(curve_p) \n\t" /* make x point at curve_p */
- "ldi r27, hi8(curve_p) \n\t"
- "ld r18, y \n\t"
- "ld r19, x+ \n\t"
- "sub r18, r19 \n\t"
- "st y+, r18 \n\t"
- REPEAT(19, "ld r18, y \n\t"
- "ld r19, x+ \n\t"
- "sbc r18, r19 \n\t"
- "st y+, r18 \n\t")
- "sbiw r28, 20 \n\t" /* make y point at p_result again */
- "dec r20 \n\t" /* subtract 1 from carry flag */
- "ret \n\t"
-
- "mmod_end: \n\t"
-
- "adiw r30, 23 \n\t"
+ "sbiw r30, 1 \n\t" /* fix stack pointer */
"in r0, __SREG__ \n\t"
"cli \n\t"
"out __SP_H__, r31 \n\t"
"out __SREG__, r0 \n\t"
"out __SP_L__, r30 \n\t"
- : "+y" (p_result), "+x" (p_product)
+ : "+y" (p_result), "+x" (p_product), [carry] "+r" (l_carry)
:
- : "r0", "r18", "r19", "r20", "r30", "r31", "cc", "memory"
+ : "r0", "r18", "r19", "r30", "r31", "cc", "memory"
);
+ if(l_carry > 0)
+ {
+ --l_carry;
+ vli_sub(p_result, p_result, curve_p);
+ }
+ if(l_carry > 0)
+ {
+ vli_sub(p_result, p_result, curve_p);
+ }
+
if(vli_cmp(p_result, curve_p) > 0)
{
vli_sub(p_result, p_result, curve_p);