[xiph-cvs] cvs commit: speex/libspeex cb_search.c cb_search_sse.h vq.c vq.h
Jean-Marc Valin
jm at xiph.org
Mon Jan 19 00:09:20 PST 2004
jm 04/01/19 03:09:20
Modified: libspeex cb_search.c cb_search_sse.h vq.c vq.h
Log:
VQ search has been SSE-ized. Not really clean yet, though.
Revision Changes Path
1.106 +18 -7 speex/libspeex/cb_search.c
Index: cb_search.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/cb_search.c,v
retrieving revision 1.105
retrieving revision 1.106
diff -u -r1.105 -r1.106
--- cb_search.c 18 Jan 2004 08:13:31 -0000 1.105
+++ cb_search.c 19 Jan 2004 08:09:19 -0000 1.106
@@ -41,7 +41,7 @@
#include "cb_search_sse.h"
#else
-static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
+static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, float *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
{
int i, j, k;
for (i=0;i<shape_cb_size;i++)
@@ -93,9 +93,15 @@
{
int i,j,k,m,n,q;
spx_word16_t *resp;
+#ifdef _USE_SSE
+ __m128 *resp2;
+ __m128 *E;
+#else
+ spx_word16_t *resp2;
+ spx_word32_t *E;
+#endif
spx_word16_t *t;
spx_sig_t *e, *r2;
- spx_word32_t *E;
spx_word16_t *tmp;
spx_word32_t *ndist, *odist;
int *itmp;
@@ -109,7 +115,6 @@
int *best_index;
spx_word32_t *best_dist;
int have_sign;
-
N=complexity;
if (N>10)
N=10;
@@ -126,10 +131,16 @@
shape_cb = params->shape_cb;
have_sign = params->have_sign;
resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t);
+#ifdef _USE_SSE
+ resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128);
+ E = PUSH(stack, shape_cb_size>>2, __m128);
+#else
+ resp2 = resp;
+ E = PUSH(stack, shape_cb_size, spx_word32_t);
+#endif
t = PUSH(stack, nsf, spx_word16_t);
e = PUSH(stack, nsf, spx_sig_t);
r2 = PUSH(stack, nsf, spx_sig_t);
- E = PUSH(stack, shape_cb_size, spx_word32_t);
ind = PUSH(stack, nb_subvect, int);
tmp = PUSH(stack, 2*N*nsf, spx_word16_t);
@@ -168,7 +179,7 @@
printf ("%d\n", (int)t[i]);*/
/* Pre-compute codewords response and energy */
- compute_weighted_codebook(shape_cb, r, resp, E, shape_cb_size, subvect_size, stack);
+ compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
for (j=0;j<N;j++)
odist[j]=0;
@@ -185,9 +196,9 @@
spx_word16_t *x=ot[j]+subvect_size*i;
/*Find new n-best based on previous n-best j*/
if (have_sign)
- vq_nbest_sign(x, resp, subvect_size, shape_cb_size, E, N, best_index, best_dist);
+ vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
else
- vq_nbest(x, resp, subvect_size, shape_cb_size, E, N, best_index, best_dist);
+ vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
/*For all new n-bests*/
for (k=0;k<N;k++)
<p><p>1.2 +3 -2 speex/libspeex/cb_search_sse.h
Index: cb_search_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/cb_search_sse.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- cb_search_sse.h 18 Jan 2004 08:13:31 -0000 1.1
+++ cb_search_sse.h 19 Jan 2004 08:09:19 -0000 1.2
@@ -48,7 +48,7 @@
}
-static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, spx_word16_t *resp, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
+static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack)
{
int i, j, k;
__m128 resj, EE;
@@ -72,8 +72,9 @@
for (k=0;k<=j;k++)
resj = _mm_add_ps(resj, _mm_mul_ps(shape[k],r[j-k]));
_spx_mm_getr_ps(resj, _res+j, _res+subvect_size+j, _res+2*subvect_size+j, _res+3*subvect_size+j);
+ *resp2++ = resj;
EE = _mm_add_ps(EE, _mm_mul_ps(resj, resj));
- _mm_storeu_ps(E+i, EE);
}
+ E[i>>2] = EE;
}
}
<p><p>1.17 +99 -2 speex/libspeex/vq.c
Index: vq.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/vq.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- vq.c 4 Dec 2003 21:29:18 -0000 1.16
+++ vq.c 19 Jan 2004 08:09:19 -0000 1.17
@@ -31,6 +31,7 @@
*/
#include "vq.h"
+#include "stack_alloc.h"
int scal_quant(spx_word16_t in, const spx_word16_t *boundary, int entries)
{
@@ -77,9 +78,49 @@
return best_index;
}
+#ifdef _USE_SSE
+#include <xmmintrin.h>
+#include "misc.h"
+void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+ int i,j,k,used;
+ float *dist;
+ __m128 *in;
+ __m128 half;
+ used = 0;
+ dist = PUSH(stack, entries, float);
+ half = _mm_set_ps1(.5);
+ in = PUSH(stack, len, __m128);
+ for (i=0;i<len;i++)
+ in[i] = _mm_set_ps1(_in[i]);
+ for (i=0;i<entries>>2;i++)
+ {
+ __m128 d = _mm_mul_ps(E[i], half);
+ for (j=0;j<len;j++)
+ d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
+ _mm_storeu_ps(dist+4*i, d);
+ }
+ for (i=0;i<entries;i++)
+ {
+ if (i<N || dist[i]<best_dist[N-1])
+ {
+ for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+ {
+ best_dist[k]=best_dist[k-1];
+ nbest[k] = nbest[k-1];
+ }
+ best_dist[k]=dist[i];
+ nbest[k]=i;
+ used++;
+ }
+ }
+}
+
+
+#else
/*Finds the indices of the n-best entries in a codebook*/
-void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist)
+void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
int i,j,k,used;
used = 0;
@@ -107,8 +148,63 @@
}
}
+#endif
+
+
+
+#ifdef _USE_SSE
+
+void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+ int i,j,k,used;
+ float *dist;
+ __m128 *in;
+ __m128 half;
+ used = 0;
+ dist = PUSH(stack, entries, float);
+ half = _mm_set_ps1(.5);
+ in = PUSH(stack, len, __m128);
+ for (i=0;i<len;i++)
+ in[i] = _mm_set_ps1(_in[i]);
+ for (i=0;i<entries>>2;i++)
+ {
+ __m128 d = _mm_setzero_ps();
+ for (j=0;j<len;j++)
+ d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
+ _mm_storeu_ps(dist+4*i, d);
+ }
+ for (i=0;i<entries;i++)
+ {
+ int sign;
+ if (dist[i]>0)
+ {
+ sign=0;
+ dist[i]=-dist[i];
+ } else
+ {
+ sign=1;
+ }
+ dist[i] += .5*((float*)E)[i];
+ if (i<N || dist[i]<best_dist[N-1])
+ {
+ for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+ {
+ best_dist[k]=best_dist[k-1];
+ nbest[k] = nbest[k-1];
+ }
+ best_dist[k]=dist[i];
+ nbest[k]=i;
+ used++;
+ if (sign)
+ nbest[k]+=entries;
+ }
+ }
+}
+
+#else
+
/*Finds the indices of the n-best entries in a codebook with sign*/
-void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist)
+void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
int i,j,k, sign, used;
used=0;
@@ -145,3 +241,4 @@
}
}
}
+#endif
<p><p>1.14 +8 -2 speex/libspeex/vq.h
Index: vq.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/vq.h,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- vq.h 4 Dec 2003 21:29:18 -0000 1.13
+++ vq.h 19 Jan 2004 08:09:19 -0000 1.14
@@ -39,9 +39,15 @@
int scal_quant32(spx_word32_t in, const spx_word32_t *boundary, int entries);
int vq_index(float *in, const float *codebook, int len, int entries);
+#ifdef _USE_SSE
+#include <xmmintrin.h>
+void vq_nbest(spx_word16_t *in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
-void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist);
+void vq_nbest_sign(spx_word16_t *in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
+#else
+void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
-void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist);
+void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
+#endif
#endif
<p><p>--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list