[xiph-cvs] cvs commit: speex/libspeex ltp.c ltp_sse.h

Jean-Marc Valin jm at xiph.org
Sat Jan 17 12:52:53 PST 2004



jm          04/01/17 15:52:53

  Modified:    libspeex ltp.c ltp_sse.h
  Log:
  faster SSE implementation (reduced unaligned loads)

Revision  Changes    Path
1.108     +14 -6     speex/libspeex/ltp.c

Index: ltp.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp.c,v
retrieving revision 1.107
retrieving revision 1.108
diff -u -r1.107 -r1.108
--- ltp.c	4 Dec 2003 21:29:17 -0000	1.107
+++ ltp.c	17 Jan 2004 20:52:52 -0000	1.108
@@ -42,7 +42,7 @@
 #ifdef _USE_SSE
 #include "ltp_sse.h"
 #else
-static spx_word32_t inner_prod(spx_word16_t *x, spx_word16_t *y, int len)
+static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 {
    int i;
    spx_word32_t sum=0;
@@ -57,6 +57,18 @@
    }
    return sum;
 }
+
+static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+{
+   int i;
+   for (i=0;i<nb_pitch;i++)
+   {
+      /* Compute correlation*/
+      corr[nb_pitch-1-i]=inner_prod(_x, _y+i, len);
+   }
+
+}
+
 #endif
 
 void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
@@ -102,11 +114,7 @@
       score[i-start]=0;
    }
 
-   for (i=start;i<=end;i++)
-   {
-      /* Compute correlation*/
-      corr[i-start]=inner_prod(swn, swn-i, len);
-   }
+   pitch_xcorr(swn, swn-end, corr, len, end-start+1, stack);
 
 #ifdef FIXED_POINT
    {

<p><p>1.4       +47 -1     speex/libspeex/ltp_sse.h

Index: ltp_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp_sse.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- ltp_sse.h	2 May 2003 02:08:25 -0000	1.3
+++ ltp_sse.h	17 Jan 2004 20:52:52 -0000	1.4
@@ -30,8 +30,9 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include <xmmintrin.h>
 
-static float inner_prod(float *a, float *b, int len)
+static float inner_prod(const float *a, const float *b, int len)
 {
   float sum;
   __asm__ __volatile__ (
@@ -93,3 +94,48 @@
   : : "a" (a), "D" (b), "c" (len), "d" (&sum) : "memory");
   return sum;
 }
+
+
+static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+{
+   int i, offset;
+   __m128 *x, *y;
+   int N, L;
+   N = len>>2;
+   L = nb_pitch>>2;
+   x = PUSH(stack, N, __m128);
+   y = PUSH(stack, N+L, __m128);
+   for (i=0;i<N;i++)
+      x[i] = _mm_loadu_ps(_x+(i<<2));
+   for (offset=0;offset<4;offset++)
+   {
+      for (i=0;i<N+L;i++)
+         y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
+      for (i=0;i<L;i++)
+      {
+         int j;
+         __m128 sum, *xx, *yy;
+         sum = _mm_setzero_ps();
+         yy = y+i;
+         xx = x;
+         for (j=0;j<N;j+=10)
+         {
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[2], yy[2]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[3], yy[3]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[4], yy[4]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[5], yy[5]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[6], yy[6]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[7], yy[7]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[8], yy[8]));
+            sum = _mm_add_ps(sum, _mm_mul_ps(xx[9], yy[9]));
+            xx += 10;
+            yy += 10;
+         }
+         sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+         sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+         _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum);
+      }
+   }
+}

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list