[xiph-commits] r8840 - trunk/speex/libspeex
jm at motherfish-iii.xiph.org
jm at motherfish-iii.xiph.org
Sat Feb 5 15:23:23 PST 2005
Author: jm
Date: 2005-02-05 15:23:22 -0800 (Sat, 05 Feb 2005)
New Revision: 8840
Modified:
trunk/speex/libspeex/ltp.c
Log:
ARM assembly version of inner_prod with 8x unrolling
Modified: trunk/speex/libspeex/ltp.c
===================================================================
--- trunk/speex/libspeex/ltp.c 2005-02-05 18:09:20 UTC (rev 8839)
+++ trunk/speex/libspeex/ltp.c 2005-02-05 23:23:22 UTC (rev 8840)
@@ -46,8 +46,62 @@
#ifdef _USE_SSE
#include "ltp_sse.h"
#else
+
+#if defined(ARM4_ASM) || defined(ARM5E_ASM)
static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
{
+ spx_word32_t sum1=0,sum2=0;
+ spx_word16_t *deadx, *deady;
+ int deadlen, dead1, dead2, dead3, dead4, dead5, dead6;
+ __asm__ __volatile__ (
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ ".inner_prod_loop:\n"
+ "\tsub %7, %7, %7\n"
+ "\tsub %10, %10, %10\n"
+
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+
+ "\tsubs %4, %4, #1\n"
+ "\tadd %2, %2, %7, asr #5\n"
+ "\tadd %3, %3, %10, asr #5\n"
+ "\tbne .inner_prod_loop\n"
+ : "=r" (deadx), "=r" (deady), "=r" (sum1), "=r" (sum2), "=r" (deadlen),
+ "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r"
+ : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3)
+ : "cc", "memory"
+ );
+ return (sum1+sum2)>>1;
+}
+
+
+#else
+static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+{
int i;
spx_word32_t sum=0;
for (i=0;i<len;i+=4)
@@ -61,6 +115,7 @@
}
return sum;
}
+#endif
static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
{
More information about the commits
mailing list