[xiph-commits] r9321 - trunk/speex/libspeex
jm at motherfish-iii.xiph.org
jm at motherfish-iii.xiph.org
Fri May 27 13:59:53 PDT 2005
Author: jm
Date: 2005-05-27 13:59:46 -0700 (Fri, 27 May 2005)
New Revision: 9321
Added:
trunk/speex/libspeex/vq_sse.h
Modified:
trunk/speex/libspeex/Makefile.am
trunk/speex/libspeex/arch.h
trunk/speex/libspeex/cb_search.c
trunk/speex/libspeex/cb_search_arm4.h
trunk/speex/libspeex/cb_search_sse.h
trunk/speex/libspeex/filters.c
trunk/speex/libspeex/filters_arm4.h
trunk/speex/libspeex/filters_sse.h
trunk/speex/libspeex/fixed_arm4.h
trunk/speex/libspeex/fixed_arm5e.h
trunk/speex/libspeex/ltp.c
trunk/speex/libspeex/ltp_arm4.h
trunk/speex/libspeex/ltp_sse.h
trunk/speex/libspeex/vq.c
trunk/speex/libspeex/vq_arm4.h
Log:
Cleaned up arch-dependent optimizations
Modified: trunk/speex/libspeex/Makefile.am
===================================================================
--- trunk/speex/libspeex/Makefile.am 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/Makefile.am 2005-05-27 20:59:46 UTC (rev 9321)
@@ -55,6 +55,7 @@
stack_alloc.h \
vq.h \
vq_arm4.h \
+ vq_sse.h \
modes.h \
sb_celp.h \
vbr.h \
Modified: trunk/speex/libspeex/arch.h
===================================================================
--- trunk/speex/libspeex/arch.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/arch.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -66,16 +66,19 @@
#define VERY_SMALL 0
+#ifdef FIXED_DEBUG
+#include "fixed_debug.h"
+#else
+
+#include "fixed_generic.h"
+
#ifdef ARM5E_ASM
#include "fixed_arm5e.h"
#elif defined (ARM4_ASM)
#include "fixed_arm4.h"
-#elif defined (FIXED_DEBUG)
-#include "fixed_debug.h"
-#else
-#include "fixed_generic.h"
#endif
+#endif
#else
Modified: trunk/speex/libspeex/cb_search.c
===================================================================
--- trunk/speex/libspeex/cb_search.c 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/cb_search.c 2005-05-27 20:59:46 UTC (rev 9321)
@@ -43,8 +43,9 @@
#include "cb_search_sse.h"
#elif defined(ARM4_ASM) || defined(ARM5E_ASM)
#include "cb_search_arm4.h"
-#else
+#endif
+#ifndef OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
static void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
{
int i, j, k;
@@ -79,7 +80,6 @@
}
}
-
#endif
Modified: trunk/speex/libspeex/cb_search_arm4.h
===================================================================
--- trunk/speex/libspeex/cb_search_arm4.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/cb_search_arm4.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -29,6 +29,7 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
{
int i, j, k;
Modified: trunk/speex/libspeex/cb_search_sse.h
===================================================================
--- trunk/speex/libspeex/cb_search_sse.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/cb_search_sse.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -47,7 +47,7 @@
}
-
+#define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack)
{
int i, j, k;
Modified: trunk/speex/libspeex/filters.c
===================================================================
--- trunk/speex/libspeex/filters.c 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/filters.c 2005-05-27 20:59:46 UTC (rev 9321)
@@ -41,6 +41,14 @@
#include "ltp.h"
#include <math.h>
+#ifdef _USE_SSE
+#include "filters_sse.h"
+#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
+#include "filters_arm4.h"
+#endif
+
+
+
void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
{
int i;
@@ -153,11 +161,8 @@
return EXTRACT16(SHR32(SHL32(EXTEND32(spx_sqrt(1+DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
}
-#if defined(ARM4_ASM) || defined(ARM5E_ASM)
-#include "filters_arm4.h"
-#else
-
+#ifndef OVERRIDE_NORMALIZE16
int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
{
int i;
@@ -185,7 +190,25 @@
return sig_shift;
}
+#endif
+#else
+
+spx_word16_t compute_rms(const spx_sig_t *x, int len)
+{
+ int i;
+ float sum=0;
+ for (i=0;i<len;i++)
+ {
+ sum += x[i]*x[i];
+ }
+ return sqrt(.1+sum/len);
+}
+#endif
+
+
+
+#ifndef OVERRIDE_FILTER_MEM2
#ifdef PRECISION16
void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
@@ -225,7 +248,9 @@
}
}
#endif
+#endif
+#ifndef OVERRIDE_IIR_MEM2
#ifdef PRECISION16
void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
@@ -264,10 +289,9 @@
}
}
#endif
-
#endif
-
+#ifndef OVERRIDE_FIR_MEM2
#ifdef PRECISION16
void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
@@ -305,80 +329,15 @@
}
}
#endif
+#endif
-#else
-spx_word16_t compute_rms(const spx_sig_t *x, int len)
-{
- int i;
- float sum=0;
- for (i=0;i<len;i++)
- {
- sum += x[i]*x[i];
- }
- return sqrt(.1+sum/len);
-}
-#ifdef _USE_SSE
-#include "filters_sse.h"
-#else
-void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
- int i,j;
- float xi,yi;
- for (i=0;i<N;i++)
- {
- xi=x[i];
- y[i] = xi + mem[0];
- yi=y[i];
- for (j=0;j<ord-1;j++)
- {
- mem[j] = mem[j+1] + num[j]*xi - den[j]*yi;
- }
- mem[ord-1] = num[ord-1]*xi - den[ord-1]*yi;
- }
-}
-
-void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
- int i,j;
- for (i=0;i<N;i++)
- {
- y[i] = x[i] + mem[0];
- for (j=0;j<ord-1;j++)
- {
- mem[j] = mem[j+1] - den[j]*y[i];
- }
- mem[ord-1] = - den[ord-1]*y[i];
- }
-}
-
-
-void fir_mem2(const spx_sig_t *x, const spx_coef_t *num, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
- int i,j;
- float xi;
- for (i=0;i<N;i++)
- {
- xi=x[i];
- y[i] = xi + mem[0];
- for (j=0;j<ord-1;j++)
- {
- mem[j] = mem[j+1] + num[j]*xi;
- }
- mem[ord-1] = num[ord-1]*xi;
- }
-}
-#endif
-
-#endif
-
-
void syn_percep_zero(const spx_sig_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_sig_t *y, int N, int ord, char *stack)
{
int i;
Modified: trunk/speex/libspeex/filters_arm4.h
===================================================================
--- trunk/speex/libspeex/filters_arm4.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/filters_arm4.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -30,6 +30,7 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define OVERRIDE_NORMALIZE16
int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len)
{
int i;
@@ -92,7 +93,7 @@
return sig_shift;
}
-
+#define OVERRIDE_FILTER_MEM2
void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
int i,j;
@@ -251,6 +252,7 @@
}
}
+#define OVERRIDE_IIR_MEM2
void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
int i,j;
Modified: trunk/speex/libspeex/filters_sse.h
===================================================================
--- trunk/speex/libspeex/filters_sse.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/filters_sse.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -128,7 +128,7 @@
}
-
+#define OVERRIDE_FILTER_MEM2
void filter_mem2(const float *x, const float *_num, const float *_den, float *y, int N, int ord, float *_mem)
{
if(ord==10)
@@ -227,6 +227,7 @@
_mm_storeu_ps(_mem+4, mem[1]);
}
+#define OVERRIDE_IIR_MEM2
void iir_mem2(const float *x, const float *_den, float *y, int N, int ord, float *_mem)
{
if(ord==10)
@@ -323,7 +324,7 @@
_mm_storeu_ps(_mem+4, mem[1]);
}
-
+#define OVERRIDE_FIR_MEM2
void fir_mem2(const float *x, const float *_num, float *y, int N, int ord, float *_mem)
{
if(ord==10)
Modified: trunk/speex/libspeex/fixed_arm4.h
===================================================================
--- trunk/speex/libspeex/fixed_arm4.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/fixed_arm4.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -35,43 +35,7 @@
#ifndef FIXED_ARM4_H
#define FIXED_ARM4_H
-#define NEG16(x) (-(x))
-#define NEG32(x) (-(x))
-#define EXTRACT16(x) ((spx_word16_t)x)
-#define EXTEND32(x) ((spx_word32_t)x)
-#define SHR16(a,shift) ((a) >> (shift))
-#define SHL16(a,shift) ((a) << (shift))
-#define SHR32(a,shift) ((a) >> (shift))
-#define SHL32(a,shift) ((a) << (shift))
-#define PSHR16(a,shift) (SHR16((a)+(1<<((shift)-1)),shift))
-#define PSHR32(a,shift) (SHR32((a)+(1<<((shift)-1)),shift))
-#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-
-#define SHR(a,shift) ((a) >> (shift))
-#define SHL(a,shift) ((a) << (shift))
-#define SATURATE(x,a) ((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))
-#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift))
-
-#define ADD16(a,b) ((a)+(b))
-#define SUB16(a,b) ((a)-(b))
-#define ADD32(a,b) ((a)+(b))
-#define SUB32(a,b) ((a)-(b))
-#define ADD64(a,b) ((a)+(b))
-
-
-/* result fits in 16 bits */
-#define MULT16_16_16(a,b) ((a)*(b))
-
-#define MULT16_16(a,b) ((a)*(b))
-
-
-
-
-#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
-//#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
+#undef MULT16_32_Q14
static inline spx_word32_t MULT16_32_Q14(spx_word16_t x, spx_word32_t y) {
int res;
int dummy;
@@ -84,10 +48,7 @@
return(res);
}
-#define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))
-#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
-
-//#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+#undef MULT16_32_Q15
static inline spx_word32_t MULT16_32_Q15(spx_word16_t x, spx_word32_t y) {
int res;
int dummy;
@@ -100,25 +61,7 @@
return(res);
}
-#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
-
-
-#define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11)))
-#define MAC16_16_Q13(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),13)))
-
-#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
-#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
-#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
-#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
-
-#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
-#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
-#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
-
-#define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15))
-
-
-
+#undef DIV32_16
static inline short DIV32_16(int a, int b)
{
int res=0;
@@ -201,8 +144,5 @@
return res;
}
-#define DIV32(a,b) (((signed int)(a))/((signed int)(b)))
-
-
#endif
Modified: trunk/speex/libspeex/fixed_arm5e.h
===================================================================
--- trunk/speex/libspeex/fixed_arm5e.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/fixed_arm5e.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -35,35 +35,7 @@
#ifndef FIXED_ARM5E_H
#define FIXED_ARM5E_H
-#define NEG16(x) (-(x))
-#define NEG32(x) (-(x))
-#define EXTRACT16(x) ((spx_word16_t)x)
-#define EXTEND32(x) ((spx_word32_t)x)
-#define SHR16(a,shift) ((a) >> (shift))
-#define SHL16(a,shift) ((a) << (shift))
-#define SHR32(a,shift) ((a) >> (shift))
-#define SHL32(a,shift) ((a) << (shift))
-#define PSHR16(a,shift) (SHR16((a)+(1<<((shift)-1)),shift))
-#define PSHR32(a,shift) (SHR32((a)+(1<<((shift)-1)),shift))
-#define SATURATE16(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-#define SATURATE32(x,a) (((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x)))
-
-#define SHR(a,shift) ((a) >> (shift))
-#define SHL(a,shift) ((a) << (shift))
-#define SATURATE(x,a) ((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))
-#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift))
-
-
-#define ADD16(a,b) ((short)((short)(a)+(short)(b)))
-#define SUB16(a,b) ((a)-(b))
-#define ADD32(a,b) ((a)+(b))
-#define SUB32(a,b) ((a)-(b))
-#define ADD64(a,b) ((a)+(b))
-
-
-/* result fits in 16 bits */
-#define MULT16_16_16(a,b) (((short)(a))*((short)(b)))
-
+#undef MULT16_16
static inline spx_word32_t MULT16_16(spx_word16_t x, spx_word16_t y) {
int res;
asm ("smulbb %0,%1,%2;\n"
@@ -72,6 +44,7 @@
return(res);
}
+#undef MAC16_16
static inline spx_word32_t MAC16_16(spx_word32_t a, spx_word16_t x, spx_word32_t y) {
int res;
asm ("smlabb %0,%1,%2,%3;\n"
@@ -80,10 +53,7 @@
return(res);
}
-#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
-#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
-#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
-
+#undef MULT16_32_Q15
static inline spx_word32_t MULT16_32_Q15(spx_word16_t x, spx_word32_t y) {
int res;
asm ("smulwb %0,%1,%2;\n"
@@ -91,6 +61,8 @@
: "%r"(y<<1),"r"(x));
return(res);
}
+
+#undef MAC16_32_Q15
static inline spx_word32_t MAC16_32_Q15(spx_word32_t a, spx_word16_t x, spx_word32_t y) {
int res;
asm ("smlawb %0,%1,%2,%3;\n"
@@ -98,6 +70,8 @@
: "%r"(y<<1),"r"(x),"r"(a));
return(res);
}
+
+#undef MULT16_32_Q11
static inline spx_word32_t MULT16_32_Q11(spx_word16_t x, spx_word32_t y) {
int res;
asm ("smulwb %0,%1,%2;\n"
@@ -105,6 +79,8 @@
: "%r"(y<<5),"r"(x));
return(res);
}
+
+#undef MAC16_32_Q11
static inline spx_word32_t MAC16_32_Q11(spx_word32_t a, spx_word16_t x, spx_word32_t y) {
int res;
asm ("smlawb %0,%1,%2,%3;\n"
@@ -113,25 +89,8 @@
return(res);
}
-#define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11)))
-#define MAC16_16_Q13(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),13)))
-
-#define MULT16_16_Q11_32(a,b) (SHR(MULT16_16((a),(b)),11))
-#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
-#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
-#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
-
-#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
-#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
-#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
-
-#define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15))
-
-
-/*
-#define DIV32_16(a,b) ((short)(((signed int)(a))/((short)(b))))
-*/
-static inline short DIV3216(int a, int b)
+#undef DIV32_16
+static inline short DIV32_16(int a, int b)
{
int res=0;
int dead1, dead2, dead3, dead4, dead5;
@@ -214,8 +173,6 @@
}
-#define DIV32(a,b) (((signed int)(a))/((signed int)(b)))
-
#endif
Modified: trunk/speex/libspeex/ltp.c
===================================================================
--- trunk/speex/libspeex/ltp.c 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/ltp.c 2005-05-27 20:59:46 UTC (rev 9321)
@@ -50,8 +50,9 @@
#include "ltp_sse.h"
#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
#include "ltp_arm4.h"
-#else
+#endif
+#ifndef OVERRIDE_INNER_PROD
static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
{
spx_word32_t sum=0;
@@ -68,7 +69,9 @@
}
return sum;
}
+#endif
+#ifndef OVERRIDE_PITCH_XCORR
#if 0 /* HINT: Enable this for machines with enough registers (i.e. not x86) */
static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
{
@@ -144,11 +147,9 @@
}
#endif
+#endif
-
-#endif
-
void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
{
int i,j,k;
Modified: trunk/speex/libspeex/ltp_arm4.h
===================================================================
--- trunk/speex/libspeex/ltp_arm4.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/ltp_arm4.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -29,6 +29,8 @@
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+
+#define OVERRIDE_INNER_PROD
static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
{
spx_word32_t sum1=0,sum2=0;
@@ -78,7 +80,8 @@
);
return (sum1+sum2)>>1;
}
-
+
+#define OVERRIDE_PITCH_XCORR
static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
{
int i,j;
Modified: trunk/speex/libspeex/ltp_sse.h
===================================================================
--- trunk/speex/libspeex/ltp_sse.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/ltp_sse.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -32,6 +32,7 @@
#include <xmmintrin.h>
+#define OVERRIDE_INNER_PROD
static float inner_prod(const float *a, const float *b, int len)
{
int i;
@@ -50,6 +51,7 @@
return ret;
}
+#define OVERRIDE_PITCH_XCORR
static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
{
int i, offset;
Modified: trunk/speex/libspeex/vq.c
===================================================================
--- trunk/speex/libspeex/vq.c 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/vq.c 2005-05-27 20:59:46 UTC (rev 9321)
@@ -36,7 +36,15 @@
#include "vq.h"
#include "stack_alloc.h"
+#include "misc.h"
+#ifdef _USE_SSE
+#include <xmmintrin.h>
+#elif defined(SHORTCUTS) && (defined(ARM4_ASM) || defined(ARM5E_ASM))
+#include "vq_arm4.h"
+#endif
+
+
int scal_quant(spx_word16_t in, const spx_word16_t *boundary, int entries)
{
int i=0;
@@ -82,51 +90,8 @@
return best_index;
}
-#ifdef _USE_SSE
-#include <xmmintrin.h>
-#include "misc.h"
-void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
-{
- int i,j,k,used;
- VARDECL(float *dist);
- VARDECL(__m128 *in);
- __m128 half;
- used = 0;
- ALLOC(dist, entries, float);
- half = _mm_set_ps1(.5f);
- ALLOC(in, len, __m128);
- for (i=0;i<len;i++)
- in[i] = _mm_set_ps1(_in[i]);
- for (i=0;i<entries>>2;i++)
- {
- __m128 d = _mm_mul_ps(E[i], half);
- for (j=0;j<len;j++)
- d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
- _mm_storeu_ps(dist+4*i, d);
- }
- for (i=0;i<entries;i++)
- {
- if (i<N || dist[i]<best_dist[N-1])
- {
- for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
- {
- best_dist[k]=best_dist[k-1];
- nbest[k] = nbest[k-1];
- }
- best_dist[k]=dist[i];
- nbest[k]=i;
- used++;
- }
- }
-}
-
-#else
-
-#if defined(SHORTCUTS) && (defined(ARM4_ASM) || defined(ARM5E_ASM))
-#include "vq_arm4.h"
-#else
-
+#ifndef OVERRIDE_VQ_NBEST
/*Finds the indices of the n-best entries in a codebook*/
void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
@@ -157,61 +122,10 @@
}
#endif
-#endif
-#ifdef _USE_SSE
-
-void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
-{
- int i,j,k,used;
- VARDECL(float *dist);
- VARDECL(__m128 *in);
- __m128 half;
- used = 0;
- ALLOC(dist, entries, float);
- half = _mm_set_ps1(.5f);
- ALLOC(in, len, __m128);
- for (i=0;i<len;i++)
- in[i] = _mm_set_ps1(_in[i]);
- for (i=0;i<entries>>2;i++)
- {
- __m128 d = _mm_setzero_ps();
- for (j=0;j<len;j++)
- d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
- _mm_storeu_ps(dist+4*i, d);
- }
- for (i=0;i<entries;i++)
- {
- int sign;
- if (dist[i]>0)
- {
- sign=0;
- dist[i]=-dist[i];
- } else
- {
- sign=1;
- }
- dist[i] += .5f*((float*)E)[i];
- if (i<N || dist[i]<best_dist[N-1])
- {
- for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
- {
- best_dist[k]=best_dist[k-1];
- nbest[k] = nbest[k-1];
- }
- best_dist[k]=dist[i];
- nbest[k]=i;
- used++;
- if (sign)
- nbest[k]+=entries;
- }
- }
-}
-
-#else
-
+#ifndef OVERRIDE_VQ_NBEST_SIGN
/*Finds the indices of the n-best entries in a codebook with sign*/
void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
Modified: trunk/speex/libspeex/vq_arm4.h
===================================================================
--- trunk/speex/libspeex/vq_arm4.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/vq_arm4.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -30,6 +30,7 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define OVERRIDE_VQ_NBEST
void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
int i,j;
Added: trunk/speex/libspeex/vq_sse.h
===================================================================
--- trunk/speex/libspeex/vq_sse.h 2005-05-27 18:05:05 UTC (rev 9320)
+++ trunk/speex/libspeex/vq_sse.h 2005-05-27 20:59:46 UTC (rev 9321)
@@ -0,0 +1,118 @@
+/* Copyright (C) 2004 Jean-Marc Valin
+ File: vq_arm4.h
+ ARM4-optimized vq routine
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#define OVERRIDE_VQ_NBEST
+void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+ int i,j,k,used;
+ VARDECL(float *dist);
+ VARDECL(__m128 *in);
+ __m128 half;
+ used = 0;
+ ALLOC(dist, entries, float);
+ half = _mm_set_ps1(.5f);
+ ALLOC(in, len, __m128);
+ for (i=0;i<len;i++)
+ in[i] = _mm_set_ps1(_in[i]);
+ for (i=0;i<entries>>2;i++)
+ {
+ __m128 d = _mm_mul_ps(E[i], half);
+ for (j=0;j<len;j++)
+ d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
+ _mm_storeu_ps(dist+4*i, d);
+ }
+ for (i=0;i<entries;i++)
+ {
+ if (i<N || dist[i]<best_dist[N-1])
+ {
+ for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+ {
+ best_dist[k]=best_dist[k-1];
+ nbest[k] = nbest[k-1];
+ }
+ best_dist[k]=dist[i];
+ nbest[k]=i;
+ used++;
+ }
+ }
+}
+
+
+
+
+#define OVERRIDE_VQ_NBEST_SIGN
+void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+ int i,j,k,used;
+ VARDECL(float *dist);
+ VARDECL(__m128 *in);
+ __m128 half;
+ used = 0;
+ ALLOC(dist, entries, float);
+ half = _mm_set_ps1(.5f);
+ ALLOC(in, len, __m128);
+ for (i=0;i<len;i++)
+ in[i] = _mm_set_ps1(_in[i]);
+ for (i=0;i<entries>>2;i++)
+ {
+ __m128 d = _mm_setzero_ps();
+ for (j=0;j<len;j++)
+ d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
+ _mm_storeu_ps(dist+4*i, d);
+ }
+ for (i=0;i<entries;i++)
+ {
+ int sign;
+ if (dist[i]>0)
+ {
+ sign=0;
+ dist[i]=-dist[i];
+ } else
+ {
+ sign=1;
+ }
+ dist[i] += .5f*((float*)E)[i];
+ if (i<N || dist[i]<best_dist[N-1])
+ {
+ for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+ {
+ best_dist[k]=best_dist[k-1];
+ nbest[k] = nbest[k-1];
+ }
+ best_dist[k]=dist[i];
+ nbest[k]=i;
+ used++;
+ if (sign)
+ nbest[k]+=entries;
+ }
+ }
+}
More information about the commits
mailing list