Minor speedups to celt_pitch_xcorr_edsp(). This moves some stuff out of the main loop for the unaligned case.

commit: c16a96b45886283d23f8bfda5bad115397f21689 [log] [tgz]
author: Timothy B. Terriberry <tterribe@xiph.org> Wed Nov 27 11:06:59 2013 -0800
committer: Timothy B. Terriberry <tterribe@xiph.org> Wed Nov 27 11:08:33 2013 -0800
tree: 9cff3b7dbd56522a8781e2019a16463c5234fda5
parent: a1740d9ad176ed3716b5c35cba6c47da733b789c [diff]
diff --git a/celt/arm/celt_pitch_xcorr_arm.s b/celt/arm/celt_pitch_xcorr_arm.s
index 0ad8f5b..09917b1 100644
--- a/celt/arm/celt_pitch_xcorr_arm.s
+++ b/celt/arm/celt_pitch_xcorr_arm.s

@@ -371,20 +371,19 @@
   LDRH         r8, [r5], #2
   BLE celt_pitch_xcorr_edsp_process1u_loop4_done
   LDR          r6, [r4], #4
-  LDR          r9, [r5], #4
-  LDR          r7, [r4], #4
+  MOV          r8, r8, LSL #16
 celt_pitch_xcorr_edsp_process1u_loop4
-  SMLABB       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
-  SUBS         r12, r12, #4         ; j-=4
+  LDR          r9, [r5], #4
+  SMLABT       r14, r6, r8, r14     ; sum = MAC16_16(sum, x_0, y_0)
+  LDR          r7, [r4], #4
   SMLATB       r14, r6, r9, r14     ; sum = MAC16_16(sum, x_1, y_1)
-  LDR          r10, [r5], #4
+  LDR          r8, [r5], #4
   SMLABT       r14, r7, r9, r14     ; sum = MAC16_16(sum, x_2, y_2)
+  SUBS         r12, r12, #4         ; j-=4
+  SMLATB       r14, r7, r8, r14     ; sum = MAC16_16(sum, x_3, y_3)
   LDRGT        r6, [r4], #4
-  SMLATB       r14, r7, r10, r14    ; sum = MAC16_16(sum, x_3, y_3)
-  LDRGT        r9, [r5], #4
-  MOV          r8, r10, LSR #16
-  LDRGT        r7, [r4], #4
   BGT celt_pitch_xcorr_edsp_process1u_loop4
+  MOV          r8, r8, LSR #16
 celt_pitch_xcorr_edsp_process1u_loop4_done
   ADDS         r12, r12, #4
 celt_pitch_xcorr_edsp_process1u_loop1
@@ -530,6 +529,7 @@
   ADDS         r12, r12, #1
   LDRGEH       r6, [r4], #2
   LDRGEH       r8, [r5], #2
+  ; Stall
   SMLABBGE     r14, r6, r8, r14     ; sum = MAC16_16(sum, *x, *y)
   ; maxcorr = max(maxcorr, sum)
   CMP          r0, r14
commit	c16a96b45886283d23f8bfda5bad115397f21689	[log] [tgz]
author	Timothy B. Terriberry <tterribe@xiph.org>	Wed Nov 27 11:06:59 2013 -0800
committer	Timothy B. Terriberry <tterribe@xiph.org>	Wed Nov 27 11:08:33 2013 -0800
tree	9cff3b7dbd56522a8781e2019a16463c5234fda5
parent	a1740d9ad176ed3716b5c35cba6c47da733b789c [diff]