[xiph-commits] r9321 - trunk/speex/libspeex

Fri May 27 13:59:53 PDT 2005

Author: jm
Date: 2005-05-27 13:59:46 -0700 (Fri, 27 May 2005)
New Revision: 9321

Added:
   trunk/speex/libspeex/vq_sse.h
Modified:
   trunk/speex/libspeex/Makefile.am
   trunk/speex/libspeex/arch.h
   trunk/speex/libspeex/cb_search.c
   trunk/speex/libspeex/cb_search_arm4.h
   trunk/speex/libspeex/cb_search_sse.h
   trunk/speex/libspeex/filters.c
   trunk/speex/libspeex/filters_arm4.h
   trunk/speex/libspeex/filters_sse.h
   trunk/speex/libspeex/fixed_arm4.h
   trunk/speex/libspeex/fixed_arm5e.h
   trunk/speex/libspeex/ltp.c
   trunk/speex/libspeex/ltp_arm4.h
   trunk/speex/libspeex/ltp_sse.h
   trunk/speex/libspeex/vq.c
   trunk/speex/libspeex/vq_arm4.h
Log:
Cleaned up arch-dependent optimizations


Modified: trunk/speex/libspeex/Makefile.am
===================================================================

--- trunk/speex/libspeex/Makefile.am	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/Makefile.am	2005-05-27 20:59:46 UTC (rev 9321)
@@ -55,6 +55,7 @@
 	stack_alloc.h \
 	vq.h \
 	vq_arm4.h \
+	vq_sse.h \
 	modes.h \
 	sb_celp.h \
 	vbr.h \

Modified: trunk/speex/libspeex/arch.h
===================================================================
--- trunk/speex/libspeex/arch.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/arch.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -66,16 +66,19 @@
 #define VERY_SMALL 0
 
 
+#ifdef FIXED_DEBUG
+#include "fixed_debug.h"
+#else
+
+#include "fixed_generic.h"
+
 #ifdef ARM5E_ASM
 #include "fixed_arm5e.h"
 #elif defined (ARM4_ASM)
 #include "fixed_arm4.h"
-#elif defined (FIXED_DEBUG)
-#include "fixed_debug.h"
-#else
-#include "fixed_generic.h"
 #endif
 
+#endif
 
 
 #else

Modified: trunk/speex/libspeex/cb_search.c
===================================================================
--- trunk/speex/libspeex/cb_search.c	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/cb_search.c	2005-05-27 20:59:46 UTC (rev 9321)
@@ -43,8 +43,9 @@
 #include "cb_search_sse.h"
 #elif defined(ARM4_ASM) || defined(ARM5E_ASM)
 #include "cb_search_arm4.h"
-#else
+#endif
 
+#ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
 static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
 {
    int i, j, k;
@@ -79,7 +80,6 @@
    }
 
 }
-
 #endif
 
 

Modified: trunk/speex/libspeex/cb_search_arm4.h
===================================================================
--- trunk/speex/libspeex/cb_search_arm4.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/cb_search_arm4.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -29,6 +29,7 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
 static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
 {
    int i, j, k;

Modified: trunk/speex/libspeex/cb_search_sse.h
===================================================================
--- trunk/speex/libspeex/cb_search_sse.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/cb_search_sse.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -47,7 +47,7 @@
 
 }
 
-
+#define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
 static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack)
 {
    int i, j, k;

Modified: trunk/speex/libspeex/filters.c
===================================================================
--- trunk/speex/libspeex/filters.c	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/filters.c	2005-05-27 20:59:46 UTC (rev 9321)
@@ -41,6 +41,14 @@
 #include "ltp.h"
 #include <math.h>
 
+#ifdef _USE_SSE
+#include "filters_sse.h"
+#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
+#include "filters_arm4.h"
+#endif
+
+
+
 void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
 {
    int i;
@@ -153,11 +161,8 @@
    return EXTRACT16(SHR32(SHL32(EXTEND32(spx_sqrt(1+DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
 }
 
-#if defined(ARM4_ASM) || defined(ARM5E_ASM)
-#include "filters_arm4.h"
-#else
 
-
+#ifndef OVERRIDE_NORMALIZE16
 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
 {
    int i;
@@ -185,7 +190,25 @@
    
    return sig_shift;
 }
+#endif
 
+#else
+
+spx_word16_t compute_rms(const spx_sig_t *x, int len)
+{
+   int i;
+   float sum=0;
+   for (i=0;i<len;i++)
+   {
+      sum += x[i]*x[i];
+   }
+   return sqrt(.1+sum/len);
+}
+#endif
+
+
+
+#ifndef OVERRIDE_FILTER_MEM2
 #ifdef PRECISION16
 void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
 {
@@ -225,7 +248,9 @@
    }
 }
 #endif
+#endif
 
+#ifndef OVERRIDE_IIR_MEM2
 #ifdef PRECISION16
 void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
 {
@@ -264,10 +289,9 @@
    }
 }
 #endif
-
 #endif
 
-
+#ifndef OVERRIDE_FIR_MEM2
 #ifdef PRECISION16
 void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
 {
@@ -305,80 +329,15 @@
    }
 }
 #endif
+#endif
 
-#else
 
 
 
-spx_word16_t compute_rms(const spx_sig_t *x, int len)
-{
-   int i;
-   float sum=0;
-   for (i=0;i<len;i++)
-   {
-      sum += x[i]*x[i];
-   }
-   return sqrt(.1+sum/len);
-}
 
-#ifdef _USE_SSE
-#include "filters_sse.h"
-#else
 
 
-void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord,  spx_mem_t *mem)
-{
-   int i,j;
-   float xi,yi;
-   for (i=0;i<N;i++)
-   {
-      xi=x[i];
-      y[i] = xi + mem[0];
-      yi=y[i];
-      for (j=0;j<ord-1;j++)
-      {
-         mem[j] = mem[j+1] + num[j]*xi - den[j]*yi;
-      }
-      mem[ord-1] = num[ord-1]*xi - den[ord-1]*yi;
-   }
-}
 
-
-void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
-   int i,j;
-   for (i=0;i<N;i++)
-   {
-      y[i] = x[i] + mem[0];
-      for (j=0;j<ord-1;j++)
-      {
-         mem[j] = mem[j+1] - den[j]*y[i];
-      }
-      mem[ord-1] = - den[ord-1]*y[i];
-   }
-}
-
-
-void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
-   int i,j;
-   float xi;
-   for (i=0;i<N;i++)
-   {
-      xi=x[i];
-      y[i] = xi + mem[0];
-      for (j=0;j<ord-1;j++)
-      {
-         mem[j] = mem[j+1] + num[j]*xi;
-      }
-      mem[ord-1] = num[ord-1]*xi;
-   }
-}
-#endif
-
-#endif
-
-
 void syn_percep_zero(const spx_sig_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_sig_t *y, int N, int ord, char *stack)
 {
    int i;

Modified: trunk/speex/libspeex/filters_arm4.h
===================================================================
--- trunk/speex/libspeex/filters_arm4.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/filters_arm4.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -30,6 +30,7 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#define OVERRIDE_NORMALIZE16
 int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len)
 {
    int i;
@@ -92,7 +93,7 @@
    return sig_shift;
 }
 
-
+#define OVERRIDE_FILTER_MEM2
 void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
 {
    int i,j;
@@ -251,6 +252,7 @@
    }
 }
 
+#define OVERRIDE_IIR_MEM2
 void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
 {
    int i,j;

Modified: trunk/speex/libspeex/filters_sse.h
===================================================================
--- trunk/speex/libspeex/filters_sse.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/filters_sse.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -128,7 +128,7 @@
 }
 
 
-
+#define OVERRIDE_FILTER_MEM2
 void filter_mem2(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem)
 {
    if(ord==10)
@@ -227,6 +227,7 @@
    _mm_storeu_ps(_mem+4, mem[1]);
 }
 
+#define OVERRIDE_IIR_MEM2
 void iir_mem2(const float *x, const float *_den, float *y, int N, int ord, float *_mem)
 {
    if(ord==10)
@@ -323,7 +324,7 @@
    _mm_storeu_ps(_mem+4, mem[1]);
 }
 
-
+#define OVERRIDE_FIR_MEM2
 void fir_mem2(const float *x, const float *_num, float *y, int N, int ord, float *_mem)
 {
    if(ord==10)

Modified: trunk/speex/libspeex/fixed_arm4.h
===================================================================
--- trunk/speex/libspeex/fixed_arm4.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/fixed_arm4.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -35,43 +35,7 @@
 #ifndef FIXED_ARM4_H
 #define FIXED_ARM4_H
 
-#define NEG16(x) (-(x))
-#define NEG32(x) (-(x))
-#define EXTRACT16(x) ((spx_word16_t)x)
-#define EXTEND32(x) ((spx_word32_t)x)
-#define SHR16(a,shift) ((a) >> (shift))
-#define SHL16(a,shift) ((a) << (shift))
-#define SHR32(a,shift) ((a) >> (shift))
-#define SHL32(a,shift) ((a) << (shift))
-#define PSHR16(a,shift) (SHR16((a)+(1<<((shift)-1)),shift))
-#define PSHR32(a,shift) (SHR32((a)+(1<<((shift)-1)),shift))
-#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-
-#define SHR(a,shift) ((a) >> (shift))
-#define SHL(a,shift) ((a) << (shift))
-#define SATURATE(x,a) ((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))
-#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift))
-
-#define ADD16(a,b) ((a)+(b))
-#define SUB16(a,b) ((a)-(b))
-#define ADD32(a,b) ((a)+(b))
-#define SUB32(a,b) ((a)-(b))
-#define ADD64(a,b) ((a)+(b))
-
-
-/* result fits in 16 bits */
-#define MULT16_16_16(a,b)     ((a)*(b))
-
-#define MULT16_16(a,b)     ((a)*(b))
-
-
-
-
-#define MAC16_16(c,a,b)     (ADD32((c),MULT16_16((a),(b))))
-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
-//#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
+#undef MULT16_32_Q14
 static inline spx_word32_t MULT16_32_Q14(spx_word16_t x, spx_word32_t y) {
   int res;
   int dummy;
@@ -84,10 +48,7 @@
   return(res);
 }
 
-#define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))
-#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
-
-//#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+#undef MULT16_32_Q15
 static inline spx_word32_t MULT16_32_Q15(spx_word16_t x, spx_word32_t y) {
   int res;
   int dummy;
@@ -100,25 +61,7 @@
   return(res);
 }
 
-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
-
-
-#define MAC16_16_Q11(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),11)))
-#define MAC16_16_Q13(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),13)))
-
-#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
-#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
-#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
-#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
-
-#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
-#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
-#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
-
-#define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15))
-
-
-
+#undef DIV32_16
 static inline short DIV32_16(int a, int b)
 {
    int res=0;
@@ -201,8 +144,5 @@
    return res;
 }
 
-#define DIV32(a,b) (((signed int)(a))/((signed int)(b)))
 
-
-
 #endif

Modified: trunk/speex/libspeex/fixed_arm5e.h
===================================================================
--- trunk/speex/libspeex/fixed_arm5e.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/fixed_arm5e.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -35,35 +35,7 @@
 #ifndef FIXED_ARM5E_H
 #define FIXED_ARM5E_H
 
-#define NEG16(x) (-(x))
-#define NEG32(x) (-(x))
-#define EXTRACT16(x) ((spx_word16_t)x)
-#define EXTEND32(x) ((spx_word32_t)x)
-#define SHR16(a,shift) ((a) >> (shift))
-#define SHL16(a,shift) ((a) << (shift))
-#define SHR32(a,shift) ((a) >> (shift))
-#define SHL32(a,shift) ((a) << (shift))
-#define PSHR16(a,shift) (SHR16((a)+(1<<((shift)-1)),shift))
-#define PSHR32(a,shift) (SHR32((a)+(1<<((shift)-1)),shift))
-#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-
-#define SHR(a,shift) ((a) >> (shift))
-#define SHL(a,shift) ((a) << (shift))
-#define SATURATE(x,a) ((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))
-#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift))
-
-
-#define ADD16(a,b) ((short)((short)(a)+(short)(b)))
-#define SUB16(a,b) ((a)-(b))
-#define ADD32(a,b) ((a)+(b))
-#define SUB32(a,b) ((a)-(b))
-#define ADD64(a,b) ((a)+(b))
-
-
-/* result fits in 16 bits */
-#define MULT16_16_16(a,b)     (((short)(a))*((short)(b)))
-
+#undef MULT16_16
 static inline spx_word32_t MULT16_16(spx_word16_t x, spx_word16_t y) {
   int res;
   asm ("smulbb  %0,%1,%2;\n"
@@ -72,6 +44,7 @@
   return(res);
 }
 
+#undef MAC16_16
 static inline spx_word32_t MAC16_16(spx_word32_t a, spx_word16_t x, spx_word32_t y) {
   int res;
   asm ("smlabb  %0,%1,%2,%3;\n"
@@ -80,10 +53,7 @@
   return(res);
 }
 
-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
-#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
-
+#undef MULT16_32_Q15
 static inline spx_word32_t MULT16_32_Q15(spx_word16_t x, spx_word32_t y) {
   int res;
   asm ("smulwb  %0,%1,%2;\n"
@@ -91,6 +61,8 @@
                : "%r"(y<<1),"r"(x));
   return(res);
 }
+
+#undef MAC16_32_Q15
 static inline spx_word32_t MAC16_32_Q15(spx_word32_t a, spx_word16_t x, spx_word32_t y) {
   int res;
   asm ("smlawb  %0,%1,%2,%3;\n"
@@ -98,6 +70,8 @@
                : "%r"(y<<1),"r"(x),"r"(a));
   return(res);
 }
+
+#undef MULT16_32_Q11
 static inline spx_word32_t MULT16_32_Q11(spx_word16_t x, spx_word32_t y) {
   int res;
   asm ("smulwb  %0,%1,%2;\n"
@@ -105,6 +79,8 @@
                : "%r"(y<<5),"r"(x));
   return(res);
 }
+
+#undef MAC16_32_Q11
 static inline spx_word32_t MAC16_32_Q11(spx_word32_t a, spx_word16_t x, spx_word32_t y) {
   int res;
   asm ("smlawb  %0,%1,%2,%3;\n"
@@ -113,25 +89,8 @@
   return(res);
 }
 
-#define MAC16_16_Q11(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),11)))
-#define MAC16_16_Q13(c,a,b)     (ADD32((c),SHR(MULT16_16((a),(b)),13)))
-
-#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
-#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
-#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
-#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
-
-#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
-#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
-#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
-
-#define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15))
-
-
-/*
-#define DIV32_16(a,b) ((short)(((signed int)(a))/((short)(b))))
-*/
-static inline short DIV3216(int a, int b)
+#undef DIV32_16
+static inline short DIV32_16(int a, int b)
 {
    int res=0;
    int dead1, dead2, dead3, dead4, dead5;
@@ -214,8 +173,6 @@
 }
 
 
-#define DIV32(a,b) (((signed int)(a))/((signed int)(b)))
 
 
-
 #endif

Modified: trunk/speex/libspeex/ltp.c
===================================================================
--- trunk/speex/libspeex/ltp.c	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/ltp.c	2005-05-27 20:59:46 UTC (rev 9321)
@@ -50,8 +50,9 @@
 #include "ltp_sse.h"
 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
 #include "ltp_arm4.h"
-#else
+#endif
 
+#ifndef OVERRIDE_INNER_PROD
 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 {
    spx_word32_t sum=0;
@@ -68,7 +69,9 @@
    }
    return sum;
 }
+#endif
 
+#ifndef OVERRIDE_PITCH_XCORR
 #if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
 {
@@ -144,11 +147,9 @@
 
 }
 #endif
+#endif
 
 
-
-#endif
-
 void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
 {
    int i,j,k;

Modified: trunk/speex/libspeex/ltp_arm4.h
===================================================================
--- trunk/speex/libspeex/ltp_arm4.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/ltp_arm4.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -29,6 +29,8 @@
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
+
+#define OVERRIDE_INNER_PROD
 static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 {
    spx_word32_t sum1=0,sum2=0;
@@ -78,7 +80,8 @@
                         );
    return (sum1+sum2)>>1;
 }
-         
+
+#define OVERRIDE_PITCH_XCORR
 static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
 {
    int i,j;

Modified: trunk/speex/libspeex/ltp_sse.h
===================================================================
--- trunk/speex/libspeex/ltp_sse.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/ltp_sse.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -32,6 +32,7 @@
 
 #include <xmmintrin.h>
 
+#define OVERRIDE_INNER_PROD
 static float inner_prod(const float *a, const float *b, int len)
 {
    int i;
@@ -50,6 +51,7 @@
    return ret;
 }
 
+#define OVERRIDE_PITCH_XCORR
 static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
 {
    int i, offset;

Modified: trunk/speex/libspeex/vq.c
===================================================================
--- trunk/speex/libspeex/vq.c	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/vq.c	2005-05-27 20:59:46 UTC (rev 9321)
@@ -36,7 +36,15 @@
 
 #include "vq.h"
 #include "stack_alloc.h"
+#include "misc.h"
 
+#ifdef _USE_SSE
+#include <xmmintrin.h>
+#elif defined(SHORTCUTS) && (defined(ARM4_ASM) || defined(ARM5E_ASM))
+#include "vq_arm4.h"
+#endif
+
+
 int scal_quant(spx_word16_t in, const spx_word16_t *boundary, int entries)
 {
    int i=0;
@@ -82,51 +90,8 @@
    return best_index;
 }
 
-#ifdef _USE_SSE
-#include <xmmintrin.h>
-#include "misc.h"
-void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
-{
-   int i,j,k,used;
-   VARDECL(float *dist);
-   VARDECL(__m128 *in);
-   __m128 half;
-   used = 0;
-   ALLOC(dist, entries, float);
-   half = _mm_set_ps1(.5f);
-   ALLOC(in, len, __m128);
-   for (i=0;i<len;i++)
-      in[i] = _mm_set_ps1(_in[i]);
-   for (i=0;i<entries>>2;i++)
-   {
-      __m128 d = _mm_mul_ps(E[i], half);
-      for (j=0;j<len;j++)
-         d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
-      _mm_storeu_ps(dist+4*i, d);
-   }
-   for (i=0;i<entries;i++)
-   {
-      if (i<N || dist[i]<best_dist[N-1])
-      {
-         for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
-         {
-            best_dist[k]=best_dist[k-1];
-            nbest[k] = nbest[k-1];
-         }
-         best_dist[k]=dist[i];
-         nbest[k]=i;
-         used++;
-      }
-   }
-}
 
-
-#else
-
-#if defined(SHORTCUTS) && (defined(ARM4_ASM) || defined(ARM5E_ASM))
-#include "vq_arm4.h"
-#else
-
+#ifndef OVERRIDE_VQ_NBEST
 /*Finds the indices of the n-best entries in a codebook*/
 void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
 {
@@ -157,61 +122,10 @@
 }
 #endif
 
-#endif
 
 
 
-#ifdef _USE_SSE
-
-void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
-{
-   int i,j,k,used;
-   VARDECL(float *dist);
-   VARDECL(__m128 *in);
-   __m128 half;
-   used = 0;
-   ALLOC(dist, entries, float);
-   half = _mm_set_ps1(.5f);
-   ALLOC(in, len, __m128);
-   for (i=0;i<len;i++)
-      in[i] = _mm_set_ps1(_in[i]);
-   for (i=0;i<entries>>2;i++)
-   {
-      __m128 d = _mm_setzero_ps();
-      for (j=0;j<len;j++)
-         d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
-      _mm_storeu_ps(dist+4*i, d);
-   }
-   for (i=0;i<entries;i++)
-   {
-      int sign;
-      if (dist[i]>0)
-      {
-         sign=0;
-         dist[i]=-dist[i];
-      } else
-      {
-         sign=1;
-      }
-      dist[i] += .5f*((float*)E)[i];
-      if (i<N || dist[i]<best_dist[N-1])
-      {
-         for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
-         {
-            best_dist[k]=best_dist[k-1];
-            nbest[k] = nbest[k-1];
-         }
-         best_dist[k]=dist[i];
-         nbest[k]=i;
-         used++;
-         if (sign)
-            nbest[k]+=entries;
-      }
-   }
-}
-
-#else
-
+#ifndef OVERRIDE_VQ_NBEST_SIGN
 /*Finds the indices of the n-best entries in a codebook with sign*/
 void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
 {

Modified: trunk/speex/libspeex/vq_arm4.h
===================================================================
--- trunk/speex/libspeex/vq_arm4.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/vq_arm4.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -30,6 +30,7 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#define OVERRIDE_VQ_NBEST
 void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
 {
    int i,j;

Added: trunk/speex/libspeex/vq_sse.h
===================================================================
--- trunk/speex/libspeex/vq_sse.h	2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/vq_sse.h	2005-05-27 20:59:46 UTC (rev 9321)
@@ -0,0 +1,118 @@
+/* Copyright (C) 2004 Jean-Marc Valin 
+   File: vq_arm4.h
+   ARM4-optimized vq routine
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+   
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+   
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+   
+   - Neither the name of the Xiph.org Foundation nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+   
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define OVERRIDE_VQ_NBEST
+void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+   int i,j,k,used;
+   VARDECL(float *dist);
+   VARDECL(__m128 *in);
+   __m128 half;
+   used = 0;
+   ALLOC(dist, entries, float);
+   half = _mm_set_ps1(.5f);
+   ALLOC(in, len, __m128);
+   for (i=0;i<len;i++)
+      in[i] = _mm_set_ps1(_in[i]);
+   for (i=0;i<entries>>2;i++)
+   {
+      __m128 d = _mm_mul_ps(E[i], half);
+      for (j=0;j<len;j++)
+         d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
+      _mm_storeu_ps(dist+4*i, d);
+   }
+   for (i=0;i<entries;i++)
+   {
+      if (i<N || dist[i]<best_dist[N-1])
+      {
+         for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+         {
+            best_dist[k]=best_dist[k-1];
+            nbest[k] = nbest[k-1];
+         }
+         best_dist[k]=dist[i];
+         nbest[k]=i;
+         used++;
+      }
+   }
+}
+
+
+
+
+#define OVERRIDE_VQ_NBEST_SIGN
+void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+   int i,j,k,used;
+   VARDECL(float *dist);
+   VARDECL(__m128 *in);
+   __m128 half;
+   used = 0;
+   ALLOC(dist, entries, float);
+   half = _mm_set_ps1(.5f);
+   ALLOC(in, len, __m128);
+   for (i=0;i<len;i++)
+      in[i] = _mm_set_ps1(_in[i]);
+   for (i=0;i<entries>>2;i++)
+   {
+      __m128 d = _mm_setzero_ps();
+      for (j=0;j<len;j++)
+         d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
+      _mm_storeu_ps(dist+4*i, d);
+   }
+   for (i=0;i<entries;i++)
+   {
+      int sign;
+      if (dist[i]>0)
+      {
+         sign=0;
+         dist[i]=-dist[i];
+      } else
+      {
+         sign=1;
+      }
+      dist[i] += .5f*((float*)E)[i];
+      if (i<N || dist[i]<best_dist[N-1])
+      {
+         for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+         {
+            best_dist[k]=best_dist[k-1];
+            nbest[k] = nbest[k-1];
+         }
+         best_dist[k]=dist[i];
+         nbest[k]=i;
+         used++;
+         if (sign)
+            nbest[k]+=entries;
+      }
+   }
+}