[xiph-cvs] cvs commit: speex/libspeex cb_search.c cb_search_sse.h vq.c vq.h

Jean-Marc Valin jm at xiph.org
Mon Jan 19 00:09:20 PST 2004



jm          04/01/19 03:09:20

  Modified:    libspeex cb_search.c cb_search_sse.h vq.c vq.h
  Log:
  VQ search has been SSE-ized. Not really clean yet, though.

Revision  Changes    Path
1.106     +18 -7     speex/libspeex/cb_search.c

Index: cb_search.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/cb_search.c,v
retrieving revision 1.105
retrieving revision 1.106
diff -u -r1.105 -r1.106
--- cb_search.c	18 Jan 2004 08:13:31 -0000	1.105
+++ cb_search.c	19 Jan 2004 08:09:19 -0000	1.106
@@ -41,7 +41,7 @@
 #include "cb_search_sse.h"
 #else
 
-static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
+static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *r, spx_word16_t *resp, float *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
 {
    int i, j, k;
    for (i=0;i<shape_cb_size;i++)
@@ -93,9 +93,15 @@
 {
    int i,j,k,m,n,q;
    spx_word16_t *resp;
+#ifdef _USE_SSE
+   __m128 *resp2;
+   __m128 *E;
+#else
+   spx_word16_t *resp2;
+   spx_word32_t *E;
+#endif
    spx_word16_t *t;
    spx_sig_t *e, *r2;
-   spx_word32_t *E;
    spx_word16_t *tmp;
    spx_word32_t *ndist, *odist;
    int *itmp;
@@ -109,7 +115,6 @@
    int *best_index;
    spx_word32_t *best_dist;
    int have_sign;
-
    N=complexity;
    if (N>10)
       N=10;
@@ -126,10 +131,16 @@
    shape_cb = params->shape_cb;
    have_sign = params->have_sign;
    resp = PUSH(stack, shape_cb_size*subvect_size, spx_word16_t);
+#ifdef _USE_SSE
+   resp2 = PUSH(stack, (shape_cb_size*subvect_size)>>2, __m128);
+   E = PUSH(stack, shape_cb_size>>2, __m128);
+#else
+   resp2 = resp;
+   E = PUSH(stack, shape_cb_size, spx_word32_t);
+#endif
    t = PUSH(stack, nsf, spx_word16_t);
    e = PUSH(stack, nsf, spx_sig_t);
    r2 = PUSH(stack, nsf, spx_sig_t);
-   E = PUSH(stack, shape_cb_size, spx_word32_t);
    ind = PUSH(stack, nb_subvect, int);
 
    tmp = PUSH(stack, 2*N*nsf, spx_word16_t);
@@ -168,7 +179,7 @@
      printf ("%d\n", (int)t[i]);*/
 
    /* Pre-compute codewords response and energy */
-   compute_weighted_codebook(shape_cb, r, resp, E, shape_cb_size, subvect_size, stack);
+   compute_weighted_codebook(shape_cb, r, resp, resp2, E, shape_cb_size, subvect_size, stack);
 
    for (j=0;j<N;j++)
       odist[j]=0;
@@ -185,9 +196,9 @@
          spx_word16_t *x=ot[j]+subvect_size*i;
          /*Find new n-best based on previous n-best j*/
          if (have_sign)
-            vq_nbest_sign(x, resp, subvect_size, shape_cb_size, E, N, best_index, best_dist);
+            vq_nbest_sign(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
          else
-            vq_nbest(x, resp, subvect_size, shape_cb_size, E, N, best_index, best_dist);
+            vq_nbest(x, resp2, subvect_size, shape_cb_size, E, N, best_index, best_dist, stack);
 
          /*For all new n-bests*/
          for (k=0;k<N;k++)

<p><p>1.2       +3 -2      speex/libspeex/cb_search_sse.h

Index: cb_search_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/cb_search_sse.h,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- cb_search_sse.h	18 Jan 2004 08:13:31 -0000	1.1
+++ cb_search_sse.h	19 Jan 2004 08:09:19 -0000	1.2
@@ -48,7 +48,7 @@
 }
 
 
-static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, spx_word16_t *resp, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
+static void compute_weighted_codebook(const signed char *shape_cb, const spx_sig_t *_r, float *resp, __m128 *resp2, __m128 *E, int shape_cb_size, int subvect_size, char *stack)
 {
    int i, j, k;
    __m128 resj, EE;
@@ -72,8 +72,9 @@
          for (k=0;k<=j;k++)
             resj = _mm_add_ps(resj, _mm_mul_ps(shape[k],r[j-k]));
          _spx_mm_getr_ps(resj, _res+j, _res+subvect_size+j, _res+2*subvect_size+j, _res+3*subvect_size+j);
+         *resp2++ = resj;
          EE = _mm_add_ps(EE, _mm_mul_ps(resj, resj));
-         _mm_storeu_ps(E+i, EE);
       }
+      E[i>>2] = EE;
    }
 }

<p><p>1.17      +99 -2     speex/libspeex/vq.c

Index: vq.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/vq.c,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- vq.c	4 Dec 2003 21:29:18 -0000	1.16
+++ vq.c	19 Jan 2004 08:09:19 -0000	1.17
@@ -31,6 +31,7 @@
 */
 
 #include "vq.h"
+#include "stack_alloc.h"
 
 int scal_quant(spx_word16_t in, const spx_word16_t *boundary, int entries)
 {
@@ -77,9 +78,49 @@
    return best_index;
 }
 
+#ifdef _USE_SSE
+#include <xmmintrin.h>
+#include "misc.h"
+void vq_nbest(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+   int i,j,k,used;
+   float *dist;
+   __m128 *in;
+   __m128 half;
+   used = 0;
+   dist = PUSH(stack, entries, float);
+   half = _mm_set_ps1(.5);
+   in = PUSH(stack, len, __m128);
+   for (i=0;i<len;i++)
+      in[i] = _mm_set_ps1(_in[i]);
+   for (i=0;i<entries>>2;i++)
+   {
+      __m128 d = _mm_mul_ps(E[i], half);
+      for (j=0;j<len;j++)
+         d = _mm_sub_ps(d, _mm_mul_ps(in[j], *codebook++));
+      _mm_storeu_ps(dist+4*i, d);
+   }
+   for (i=0;i<entries;i++)
+   {
+      if (i<N || dist[i]<best_dist[N-1])
+      {
+         for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+         {
+            best_dist[k]=best_dist[k-1];
+            nbest[k] = nbest[k-1];
+         }
+         best_dist[k]=dist[i];
+         nbest[k]=i;
+         used++;
+      }
+   }
+}
+
+
+#else
 
 /*Finds the indices of the n-best entries in a codebook*/
-void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist)
+void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
 {
    int i,j,k,used;
    used = 0;
@@ -107,8 +148,63 @@
    }
 }
 
+#endif
+
+
+
+#ifdef _USE_SSE
+
+void vq_nbest_sign(spx_word16_t *_in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
+{
+   int i,j,k,used;
+   float *dist;
+   __m128 *in;
+   __m128 half;
+   used = 0;
+   dist = PUSH(stack, entries, float);
+   half = _mm_set_ps1(.5);
+   in = PUSH(stack, len, __m128);
+   for (i=0;i<len;i++)
+      in[i] = _mm_set_ps1(_in[i]);
+   for (i=0;i<entries>>2;i++)
+   {
+      __m128 d = _mm_setzero_ps();
+      for (j=0;j<len;j++)
+         d = _mm_add_ps(d, _mm_mul_ps(in[j], *codebook++));
+      _mm_storeu_ps(dist+4*i, d);
+   }
+   for (i=0;i<entries;i++)
+   {
+      int sign;
+      if (dist[i]>0)
+      {
+         sign=0;
+         dist[i]=-dist[i];
+      } else
+      {
+         sign=1;
+      }
+      dist[i] += .5*((float*)E)[i];
+      if (i<N || dist[i]<best_dist[N-1])
+      {
+         for (k=N-1; (k >= 1) && (k > used || dist[i] < best_dist[k-1]); k--)
+         {
+            best_dist[k]=best_dist[k-1];
+            nbest[k] = nbest[k-1];
+         }
+         best_dist[k]=dist[i];
+         nbest[k]=i;
+         used++;
+         if (sign)
+            nbest[k]+=entries;
+      }
+   }
+}
+
+#else
+
 /*Finds the indices of the n-best entries in a codebook with sign*/
-void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist)
+void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
 {
    int i,j,k, sign, used;
    used=0;
@@ -145,3 +241,4 @@
       }
    }
 }
+#endif

<p><p>1.14      +8 -2      speex/libspeex/vq.h

Index: vq.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/vq.h,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- vq.h	4 Dec 2003 21:29:18 -0000	1.13
+++ vq.h	19 Jan 2004 08:09:19 -0000	1.14
@@ -39,9 +39,15 @@
 int scal_quant32(spx_word32_t in, const spx_word32_t *boundary, int entries);
 
 int vq_index(float *in, const float *codebook, int len, int entries);
+#ifdef _USE_SSE
+#include <xmmintrin.h>
+void vq_nbest(spx_word16_t *in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
 
-void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist);
+void vq_nbest_sign(spx_word16_t *in, const __m128 *codebook, int len, int entries, __m128 *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
+#else
+void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
 
-void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist);
+void vq_nbest_sign(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack);
+#endif
 
 #endif

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list