[xiph-cvs] cvs commit: speex/libspeex ltp_sse.h

Jean-Marc Valin jm at xiph.org
Sat Jan 17 22:47:48 PST 2004



jm          04/01/18 01:47:48

  Modified:    libspeex ltp_sse.h
  Log:
  converted the inner product function to SSE intrinsics too

Revision  Changes    Path
1.5       +23 -61    speex/libspeex/ltp_sse.h

Index: ltp_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp_sse.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- ltp_sse.h	17 Jan 2004 20:52:52 -0000	1.4
+++ ltp_sse.h	18 Jan 2004 06:47:47 -0000	1.5
@@ -1,6 +1,6 @@
 /* Copyright (C) 2002 Jean-Marc Valin 
    File: ltp.c
-   Lont-Term Prediction functions
+   Lont-Term Prediction functions (SSE version)
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
@@ -34,68 +34,30 @@
 
 static float inner_prod(const float *a, const float *b, int len)
 {
-  float sum;
-  __asm__ __volatile__ (
-  "\tpush %%eax\n"
-  "\tpush %%edi\n"
-  "\tpush %%ecx\n"
-  "\txorps %%xmm3, %%xmm3\n"
-  "\txorps %%xmm4, %%xmm4\n"
-
-  "\tsub $20, %%ecx\n"
-
-".mul20_loop%=:\n"
-
-  "\tmovups (%%eax), %%xmm0\n"
-  "\tmovups (%%edi), %%xmm1\n"
-  "\tmulps %%xmm0, %%xmm1\n"
-
-  "\tmovups 16(%%eax), %%xmm5\n"
-  "\tmovups 16(%%edi), %%xmm6\n"
-  "\tmulps %%xmm5, %%xmm6\n"
-  "\taddps %%xmm1, %%xmm3\n"
-
-  "\tmovups 32(%%eax), %%xmm0\n"
-  "\tmovups 32(%%edi), %%xmm1\n"
-  "\tmulps %%xmm0, %%xmm1\n"
-  "\taddps %%xmm6, %%xmm4\n"
-
-  "\tmovups 48(%%eax), %%xmm5\n"
-  "\tmovups 48(%%edi), %%xmm6\n"
-  "\tmulps %%xmm5, %%xmm6\n"
-  "\taddps %%xmm1, %%xmm3\n"
-
-  "\tmovups 64(%%eax), %%xmm0\n"
-  "\tmovups 64(%%edi), %%xmm1\n"
-  "\tmulps %%xmm0, %%xmm1\n"
-  "\taddps %%xmm6, %%xmm4\n"
-  "\taddps %%xmm1, %%xmm3\n"
-
-
-  "\tadd $80, %%eax\n"
-  "\tadd $80, %%edi\n"
-
-  "\tsub $20,  %%ecx\n"
-
-  "\tjae .mul20_loop%=\n"
-
-  "\taddps %%xmm4, %%xmm3\n"
-
-  "\tmovhlps %%xmm3, %%xmm4\n"
-  "\taddps %%xmm4, %%xmm3\n"
-  "\tmovaps %%xmm3, %%xmm4\n"
-  "\tshufps $0x55, %%xmm4, %%xmm4\n"
-  "\taddss %%xmm4, %%xmm3\n"
-  "\tmovss %%xmm3, (%%edx)\n"
-  
-  "\tpop %%ecx\n"
-  "\tpop %%edi\n"
-  "\tpop %%eax\n"
-  : : "a" (a), "D" (b), "c" (len), "d" (&sum) : "memory");
-  return sum;
+   int i;
+   float ret;
+   __m128 sum = _mm_setzero_ps();
+   for (i=0;i<(len>>2);i+=10)
+   {
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+0), _mm_loadu_ps(b+0)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+4), _mm_loadu_ps(b+4)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+8), _mm_loadu_ps(b+8)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+12), _mm_loadu_ps(b+12)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+16), _mm_loadu_ps(b+16)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+20), _mm_loadu_ps(b+20)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+24), _mm_loadu_ps(b+24)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+28), _mm_loadu_ps(b+28)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+32), _mm_loadu_ps(b+32)));
+      sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+36), _mm_loadu_ps(b+36)));
+      a += 40;
+      b += 40;
+   }
+   sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+   sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+   _mm_store_ss(&ret, sum);
+   return ret;
 }
 
-
 static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
 {
    int i, offset;

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list