Add AVR fast multiply/square for secp224r1.
diff --git a/asm_avr.inc b/asm_avr.inc
index 0b07a3d..08b996f 100644
--- a/asm_avr.inc
+++ b/asm_avr.inc
@@ -4847,6 +4847,3776 @@
     );
 }
 #define asm_mult 1
+#elif (uECC_BYTES == 28)
+__attribute((noinline))
+static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
+    __asm__ volatile (
+        "adiw r30, 20 \n\t"
+        "adiw r28, 20 \n\t"
+        "ld r2, x+ \n\t"
+        "ld r3, x+ \n\t"
+        "ld r4, x+ \n\t"
+        "ld r5, x+ \n\t"
+        "ld r6, x+ \n\t"
+        "ld r7, x+ \n\t"
+        "ld r8, x+ \n\t"
+        "ld r9, x+ \n\t"
+        "ld r12, y+ \n\t"
+        "ld r13, y+ \n\t"
+        "ld r14, y+ \n\t"
+        "ld r15, y+ \n\t"
+        "ld r16, y+ \n\t"
+        "ld r17, y+ \n\t"
+        "ld r18, y+ \n\t"
+        "ld r19, y+ \n\t"
+        "ldi r25, 0 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r12 \n\t"
+        "st z+, r0 \n\t"
+        "mov r22, r1 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "mul r3, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r3, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r4, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r5, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r6, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r7, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r8, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "mul r9, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "st z+, r23 \n\t"
+        "st z+, r24 \n\t"
+
+        "sbiw r30, 26 \n\t"
+        "sbiw r28, 18 \n\t"
+        "ld r12, y+ \n\t"
+        "ld r13, y+ \n\t"
+        "ld r14, y+ \n\t"
+        "ld r15, y+ \n\t"
+        "ld r16, y+ \n\t"
+        "ld r17, y+ \n\t"
+        "ld r18, y+ \n\t"
+        "ld r19, y+ \n\t"
+        "ld r20, y+ \n\t"
+        "ld r21, y+ \n\t"
+        "ld r10, x+ \n\t"
+        "ld r11, x+ \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r12 \n\t"
+        "st z+, r0 \n\t"
+        "mov r22, r1 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "mul r3, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r2, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r3, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r3, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r4, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r4, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r5, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r5, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r6, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r6, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r7, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r7, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r8, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r8, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r9, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r9, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r12, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r13, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r14, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r15, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r16, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r17, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r18, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r19, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r11, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r3, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r4, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r5, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r6, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r7, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r8, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "mul r9, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "st z+, r22 \n\t"
+        "st z+, r23 \n\t"
+
+        "sbiw r30, 46 \n\t"
+        "sbiw r28, 28 \n\t"
+        "sbiw r26, 18 \n\t"
+        "ld r2, x+ \n\t"
+        "ld r12, y+ \n\t"
+        "ld r3, x+ \n\t"
+        "ld r13, y+ \n\t"
+        "ld r4, x+ \n\t"
+        "ld r14, y+ \n\t"
+        "ld r5, x+ \n\t"
+        "ld r15, y+ \n\t"
+        "ld r6, x+ \n\t"
+        "ld r16, y+ \n\t"
+        "ld r7, x+ \n\t"
+        "ld r17, y+ \n\t"
+        "ld r8, x+ \n\t"
+        "ld r18, y+ \n\t"
+        "ld r9, x+ \n\t"
+        "ld r19, y+ \n\t"
+        "ld r10, x+ \n\t"
+        "ld r20, y+ \n\t"
+        "ld r11, x+ \n\t"
+        "ld r21, y+ \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r12 \n\t"
+        "st z+, r0 \n\t"
+        "mov r22, r1 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "mul r3, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r2, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r3, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r3, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r4, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r4, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r5, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r5, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r6, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r6, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r7, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r7, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r8, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r8, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r9, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r9, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r10, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r11, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r11, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r2, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r2, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r3, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r3, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r4, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r4, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r5, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r5, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r6, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r6, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r7, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r10, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r7, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r8, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r10, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r8, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r9, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r9, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r12, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r13, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r14, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r15, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r16, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r17, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r18, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r19, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r20, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r21, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r12, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r13, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r14, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r15, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r16, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r17, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r10, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r25 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r18, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r19, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r10, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r21 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r25 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r11, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r21 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r3, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r4, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r5, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r6, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r7, r19 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r8, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "mul r9, r19 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "st z+, r24 \n\t"
+        "st z+, r22 \n\t"
+        "eor r1, r1 \n\t"
+        : "+x" (left), "+y" (right), "+z" (result)
+        :
+        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
+          "r21", "r22", "r23", "r24", "r25", "cc", "memory"
+    );
+}
+#define asm_mult 1
 #elif (uECC_BYTES == 32)
 __attribute((noinline))
 static void vli_mult(uint8_t *result, const uint8_t *left, const uint8_t *right) {
@@ -9778,6 +13548,7 @@
 #if uECC_SQUARE_FUNC
 
 #if (uECC_BYTES == 20)
+// Inlining is allowed because y (r28, r29) is not used.
 static void vli_square(uint8_t *result, const uint8_t *left) {
     __asm__ volatile (
         "ld r2, x+ \n\t"
@@ -12601,6 +16372,2221 @@
 }
 #define asm_square 1
 
+#elif (uECC_BYTES == 28)
+
+__attribute((noinline))
+static void vli_square(uint8_t *result, const uint8_t *left) {
+    __asm__ volatile (
+        "ldi r25, 0 \n\t"
+        "movw r28, r26 \n\t"
+        "ld r2, x+ \n\t"
+        "ld r3, x+ \n\t"
+        "ld r4, x+ \n\t"
+        "ld r5, x+ \n\t"
+        "adiw r28, 20 \n\t"
+        "ld r12, y+ \n\t"
+        "ld r13, y+ \n\t"
+        "ld r14, y+ \n\t"
+        "ld r15, y+ \n\t"
+        "adiw r30, 20 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul 2, 12 \n\t"
+        "st z+, r0 \n\t"
+        "mov r22, r1 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r12, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r2, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r13, y+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r2, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r14, y+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r2, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r15, y+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r2, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r3, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r3, x+ \n\t"
+        "ldi r23, 0 \n\t"
+        "mul r4, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r2, r13 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ld r4, x+ \n\t"
+        "ldi r24, 0 \n\t"
+        "mul r5, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r2, r14 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "mul r3, r13 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ld r5, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r3, r15 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r24, r0 \n\t"
+        "adc r22, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r24 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r4, r15 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "mul r5, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "st z+, r23 \n\t"
+        "st z+, r24 \n\t"
+
+        "sbiw r26, 8 \n\t"
+        "sbiw r30, 36 \n\t"
+        "ld r2, x+ \n\t"
+        "ld r3, x+ \n\t"
+        "ld r4, x+ \n\t"
+        "ld r5, x+ \n\t"
+        "ld r6, x+ \n\t"
+        "ld r7, x+ \n\t"
+        "ld r8, x+ \n\t"
+        "ld r9, x+ \n\t"
+        "ld r10, x+ \n\t"
+        "ld r11, x+ \n\t"
+        "ld r12, x+ \n\t"
+        "ld r13, x+ \n\t"
+        "ld r14, x+ \n\t"
+        "ld r15, x+ \n\t"
+        "ld r16, x+ \n\t"
+        "ld r17, x+ \n\t"
+        "ld r18, x+ \n\t"
+        "ld r19, x+ \n\t"
+        "ld r20, x+ \n\t"
+        "ld r21, x+ \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r2, r2 \n\t"
+        "st z+, r0 \n\t"
+        "mov r22, r1 \n\t"
+
+        "ldi r24, 0 \n\t"
+        "mul r2, r3 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "add r22, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r24, r25 \n\t"
+        "st z+, r22 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r5 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r6 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r4, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r7 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r8 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r5, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r10 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r6, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r11 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r12 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r7, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r13 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r14 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r8, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r15 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r16 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r9, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r17 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r18 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r10, r10 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r19 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r2, r20 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r3, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r11, r11 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r21 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r2, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r3, r21 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r4, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r12, r12 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r3, r2 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r4, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r3, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r4, r2 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r5, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r6, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r13, r13 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r4, r3 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r5, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r4, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r5, r3 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r6, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r7, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r14, r14 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r5, r4 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r6, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r7, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r5, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r6, r4 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r7, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r8, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r14, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r15, r15 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r6, r5 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r7, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r8, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r15, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r6, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r7, r5 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r8, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r9, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r14, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r15, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r16, r16 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r7, r6 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r8, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r9, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r15, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r16, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r7, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r8, r6 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r9, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r10, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r14, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r15, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r16, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r17, r17 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r8, r7 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r9, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r10, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r15, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r16, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r17, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r8, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r9, r7 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r10, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r11, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r14, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r15, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r16, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r17, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r18, r18 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r9, r8 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r10, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r11, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r15, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r16, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r17, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r18, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ld r9, x+ \n\t"
+        "ldi r22, 0 \n\t"
+        "mul r10, r8 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r11, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r12, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r14, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r15, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r16, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r17, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r18, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r25 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r19, r19 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r10, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r11, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r12, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r13, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r15, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r16, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r17, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r18, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r19, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "ld r0, z \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r25 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r11, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r12, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r13, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r14, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r15, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r16, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r17, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r18, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r19, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r20, r20 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r12, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r13, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r14, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r15, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r16, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r17, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r18, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r19, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r20, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r13, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r14, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r15, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r16, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r17, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r18, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r19, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r20, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r21, r21 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r14, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r15, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r16, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r17, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r18, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r19, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r20, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r21, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r15, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r16, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r17, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r18, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r19, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r20, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r21, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r2, r2 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r16, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r17, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r18, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r19, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r20, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r21, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r2, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r17, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r18, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r19, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r20, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r21, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r3, r3 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r18, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r19, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r20, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r21, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r2, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r3, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r19, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r20, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r21, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r2, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r4, r4 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r20, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r21, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r2, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r3, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r21, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r2, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r3, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r4, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r5, r5 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r2, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r3, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r4, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r5, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r3, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r4, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "mul r5, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r6, r6 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r4, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r5, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "mul r6, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r22, 0 \n\t"
+        "mul r5, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r24, r1 \n\t"
+        "mul r6, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r24 \n\t"
+        "rol r22 \n\t"
+        "mul r7, r7 \n\t"
+        "add r23, r0 \n\t"
+        "adc r24, r1 \n\t"
+        "adc r22, r25 \n\t"
+        "add r23, r28 \n\t"
+        "adc r24, r29 \n\t"
+        "adc r22, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r29, 0 \n\t"
+        "mul r6, r9 \n\t"
+        "mov r23, r0 \n\t"
+        "mov r28, r1 \n\t"
+        "mul r7, r8 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "adc r29, r25 \n\t"
+        "lsl r23 \n\t"
+        "rol r28 \n\t"
+        "rol r29 \n\t"
+        "add r23, r24 \n\t"
+        "adc r28, r22 \n\t"
+        "adc r29, r25 \n\t"
+        "st z+, r23 \n\t"
+
+        "ldi r23, 0 \n\t"
+        "mul r7, r9 \n\t"
+        "add r28, r0 \n\t"
+        "adc r29, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "add r28, r0 \n\t"
+        "adc r29, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "mul r8, r8 \n\t"
+        "add r28, r0 \n\t"
+        "adc r29, r1 \n\t"
+        "adc r23, r25 \n\t"
+        "st z+, r28 \n\t"
+
+        "ldi r28, 0 \n\t"
+        "mul r8, r9 \n\t"
+        "add r29, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r28, r25 \n\t"
+        "add r29, r0 \n\t"
+        "adc r23, r1 \n\t"
+        "adc r28, r25 \n\t"
+        "st z+, r29 \n\t"
+
+        "mul r9, r9 \n\t"
+        "add r23, r0 \n\t"
+        "adc r28, r1 \n\t"
+        "st z+, r23 \n\t"
+        "st z+, r28 \n\t"
+        "eor r1, r1 \n\t"
+        : "+x" (left), "+z" (result)
+        :
+        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20",
+          "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc", "memory"
+    );
+}
+#define asm_square 1
+
 #elif (uECC_BYTES == 32)
 
 __attribute((noinline))
diff --git a/uECC.c b/uECC.c
index 316b191..85d5f33 100644
--- a/uECC.c
+++ b/uECC.c
@@ -1328,6 +1328,7 @@
 /* Computes result = product % curve_p
    from http://www.nsa.gov/ia/_files/nist-routines.pdf */
 #if uECC_WORD_SIZE == 1
+// TODO it may be faster to use the omega_mult method when fully asm optimized.
 void vli_mmod_fast(uint8_t *RESTRICT result, uint8_t *RESTRICT product) {
     uint8_t tmp[uECC_WORDS];
     int8_t carry;