[xiph-commits] r9361 - trunk/speex/libspeex

Sun Jun 5 01:31:52 PDT 2005

Author: jm
Date: 2005-06-05 01:31:50 -0700 (Sun, 05 Jun 2005)
New Revision: 9361

Added:
   trunk/speex/libspeex/lpc_bfin.h
Modified:
   trunk/speex/libspeex/Makefile.am
   trunk/speex/libspeex/lpc.c
Log:
Blackfin assembly for auto-correlation function


Modified: trunk/speex/libspeex/Makefile.am
===================================================================

--- trunk/speex/libspeex/Makefile.am	2005-06-05 07:17:44 UTC (rev 9360)
+++ trunk/speex/libspeex/Makefile.am	2005-06-05 08:31:50 UTC (rev 9361)
@@ -48,6 +48,7 @@
 noinst_HEADERS = lsp.h \
 	nb_celp.h \
 	lpc.h \
+	lpc_bfin.h \
 	ltp.h \
 	quant_lsp.h \
 	cb_search.h \

Modified: trunk/speex/libspeex/lpc.c
===================================================================
--- trunk/speex/libspeex/lpc.c	2005-06-05 07:17:44 UTC (rev 9360)
+++ trunk/speex/libspeex/lpc.c	2005-06-05 08:31:50 UTC (rev 9361)
@@ -53,7 +53,11 @@
 #include "config.h"
 #endif
 
+#include "lpc.h"
 
+#ifdef BFIN_ASM
+#include "lpc_bfin.h"
+#endif
 
 /* LPC analysis
  *
@@ -65,8 +69,6 @@
 /* Invented by N. Levinson in 1947, modified by J. Durbin in 1959.
  */
 
-#include "lpc.h"
-
 /* returns minimum mean square error    */
 spx_word32_t _spx_lpc(
 spx_coef_t       *lpc, /* out: [0...p-1] LPC coefficients      */
@@ -122,6 +124,7 @@
  * for lags between 0 and lag-1, and x == 0 outside 0...n-1
  */
 
+#ifndef OVERRIDE_SPEEX_AUTOCORR
 void _spx_autocorr(
 const spx_word16_t *x,   /*  in: [0...n-1] samples x   */
 spx_word16_t       *ac,  /* out: [0...lag-1] ac values */
@@ -162,6 +165,7 @@
       ac[i] = SHR32(d, ac_shift);
    }
 }
+#endif
 
 
 #else

Added: trunk/speex/libspeex/lpc_bfin.h
===================================================================
--- trunk/speex/libspeex/lpc_bfin.h	2005-06-05 07:17:44 UTC (rev 9360)
+++ trunk/speex/libspeex/lpc_bfin.h	2005-06-05 08:31:50 UTC (rev 9361)
@@ -0,0 +1,129 @@
+/* Copyright (C) 2005 Analog Devices
+   Author: Jean-Marc Valin 
+   File: lpc_bfin.h
+   LPC stuff (Blackfin version)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+   
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define OVERRIDE_SPEEX_AUTOCORR
+void _spx_autocorr(
+const spx_word16_t *x,   /*  in: [0...n-1] samples x   */
+spx_word16_t       *ac,  /* out: [0...lag-1] ac values */
+int          lag, 
+int          n
+                  )
+{
+   spx_word32_t d;
+   const spx_word16_t *xs;
+   int i, j;
+   spx_word32_t ac0=1;
+   spx_word32_t ac32[11], *ac32top;
+   int shift, ac_shift;
+   ac32top = ac32+10;
+   int lag_1, N_lag;
+   int nshift;
+   lag_1 = lag-1;
+   N_lag = n-lag_1;
+   for (j=0;j<n;j++)
+      ac0 = ADD32(ac0,SHR32(MULT16_16(x[j],x[j]),8));
+   ac0 = ADD32(ac0,n);
+   shift = 8;
+   while (shift && ac0<0x40000000)
+   {
+      shift--;
+      ac0 <<= 1;
+   }
+   ac_shift = 18;
+   while (ac_shift && ac0<0x40000000)
+   {
+      ac_shift--;
+      ac0 <<= 1;
+   }
+   
+   xs = x+lag-1;
+   nshift = -shift;
+   __asm__ __volatile__ 
+   (
+         "P2 = %0;\n\t"
+         "I0 = P2;\n\t" /* x in I0 */
+         "B0 = P2;\n\t" /* x in B0 */
+         "R0 = %3;\n\t" /* len in R0 */
+         "P3 = %3;\n\t" /* len in R0 */
+         "P4 = %4;\n\t" /* nb_pitch in R0 */
+         "R1 = R0 << 1;\n\t" /* number of bytes in x */
+         "L0 = R1;\n\t"
+         "P0 = %1;\n\t"
+         "P1 = %2;\n\t"
+         "B1 = P1;\n\t"
+         "R4 = %5;\n\t"
+         "L1 = 0;\n\t" /*Disable looping on I1*/
+
+         "r0 = [I0++];\n\t"
+         "R2 = 0;R3=0;"
+         "LOOP pitch%= LC0 = P4 >> 1;\n\t"
+         "LOOP_BEGIN pitch%=;\n\t"
+            "I1 = P0;\n\t"
+            "A1 = A0 = 0;\n\t"
+            "R1 = [I1++];\n\t"
+            "LOOP inner_prod%= LC1 = P3 >> 1;\n\t"
+            "LOOP_BEGIN inner_prod%=;\n\t"
+               "A0 += R0.L*R1.L , A1 += R0.L*R1.H (is) || R1.L = W[I1++];\n\t"
+            "LOOP_END inner_prod%=;\n\t"
+               "A0 += R0.H*R1.H , A1 += R0.H*R1.L (is) || R1.H = W[I1++] || R0 = [I0++];\n\t"
+            "A0 = ASHIFT A0 by R4.L;\n\t"
+            "A1 = ASHIFT A1 by R4.L;\n\t"
+   
+            "R2 = A0, R3 = A1;\n\t"
+            "[P1--] = R2;\n\t"
+            "[P1--] = R3;\n\t"
+         "LOOP_END pitch%=;\n\t"
+            "P0 += 4;\n\t"
+   : : "m" (xs), "m" (x), "m" (ac32top), "m" (N_lag), "m" (lag_1), "m" (nshift)
+   : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "R4", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
+   );
+   d=0;
+   for (j=0;j<n;j++)
+   {
+      d = ADD32(d,SHR32(MULT16_16(x[j],x[j]), shift));
+   }
+   ac32[0] = d;
+   
+   for (i=0;i<lag;i++)
+   {
+      d=0;
+      for (j=i;j<lag_1;j++)
+      {
+         d = ADD32(d,SHR32(MULT16_16(x[j],x[j-i]), shift));
+      }
+      if (i)
+         ac32[i] += d;
+      ac[i] = SHR32(ac32[i], ac_shift);
+   }
+}
+