[xiph-commits] r8840 - trunk/speex/libspeex

jm at motherfish-iii.xiph.org jm at motherfish-iii.xiph.org
Sat Feb 5 15:23:23 PST 2005


Author: jm
Date: 2005-02-05 15:23:22 -0800 (Sat, 05 Feb 2005)
New Revision: 8840

Modified:
   trunk/speex/libspeex/ltp.c
Log:
ARM assembly version of inner_prod with 8x unrolling


Modified: trunk/speex/libspeex/ltp.c
===================================================================
--- trunk/speex/libspeex/ltp.c	2005-02-05 18:09:20 UTC (rev 8839)
+++ trunk/speex/libspeex/ltp.c	2005-02-05 23:23:22 UTC (rev 8840)
@@ -46,8 +46,62 @@
 #ifdef _USE_SSE
 #include "ltp_sse.h"
 #else
+
+#if defined(ARM4_ASM) || defined(ARM5E_ASM)
 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 {
+   spx_word32_t sum1=0,sum2=0;
+   spx_word16_t *deadx, *deady;
+   int deadlen, dead1, dead2, dead3, dead4, dead5, dead6;
+   __asm__ __volatile__ (
+         "\tldrsh %5, [%0], #2 \n"
+         "\tldrsh %6, [%1], #2 \n"
+         ".inner_prod_loop:\n"
+         "\tsub %7, %7, %7\n"
+         "\tsub %10, %10, %10\n"
+
+         "\tldrsh %8, [%0], #2 \n"
+         "\tldrsh %9, [%1], #2 \n"
+         "\tmla %7, %5, %6, %7\n"
+         "\tldrsh %5, [%0], #2 \n"
+         "\tldrsh %6, [%1], #2 \n"
+         "\tmla %10, %8, %9, %10\n"
+         "\tldrsh %8, [%0], #2 \n"
+         "\tldrsh %9, [%1], #2 \n"
+         "\tmla %7, %5, %6, %7\n"
+         "\tldrsh %5, [%0], #2 \n"
+         "\tldrsh %6, [%1], #2 \n"
+         "\tmla %10, %8, %9, %10\n"
+
+         "\tldrsh %8, [%0], #2 \n"
+         "\tldrsh %9, [%1], #2 \n"
+         "\tmla %7, %5, %6, %7\n"
+         "\tldrsh %5, [%0], #2 \n"
+         "\tldrsh %6, [%1], #2 \n"
+         "\tmla %10, %8, %9, %10\n"
+         "\tldrsh %8, [%0], #2 \n"
+         "\tldrsh %9, [%1], #2 \n"
+         "\tmla %7, %5, %6, %7\n"
+         "\tldrsh %5, [%0], #2 \n"
+         "\tldrsh %6, [%1], #2 \n"
+         "\tmla %10, %8, %9, %10\n"
+
+         "\tsubs %4, %4, #1\n"
+         "\tadd %2, %2, %7, asr #5\n"
+         "\tadd %3, %3, %10, asr #5\n"
+         "\tbne .inner_prod_loop\n"
+   : "=r" (deadx), "=r" (deady), "=r" (sum1),  "=r" (sum2), "=r" (deadlen),
+   "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r"
+   : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3)
+   : "cc", "memory"
+                        );
+   return (sum1+sum2)>>1;
+}
+
+
+#else
+static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
+{
    int i;
    spx_word32_t sum=0;
    for (i=0;i<len;i+=4)
@@ -61,6 +115,7 @@
    }
    return sum;
 }
+#endif
 
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
 {



More information about the commits mailing list