[xiph-cvs] cvs commit: speex/libspeex filters_sse.h ltp_sse.h

Jean-Marc Valin jm at xiph.org
Thu May 1 19:08:25 PDT 2003



jm          03/05/01 22:08:25

  Modified:    libspeex filters_sse.h ltp_sse.h
  Log:
  removed multi-line strings

Revision  Changes    Path
1.4       +179 -181  speex/libspeex/filters_sse.h

Index: filters_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/filters_sse.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- filters_sse.h	19 Mar 2003 01:07:34 -0000	1.3
+++ filters_sse.h	2 May 2003 02:08:25 -0000	1.4
@@ -55,158 +55,157 @@
    {
 
       __asm__ __volatile__ 
-      ("
-       movss (%1), %%xmm0
-       movss (%0), %%xmm1
-       addss %%xmm0, %%xmm1
-       movss %%xmm1, (%2)
-       shufps $0x00, %%xmm0, %%xmm0
-       shufps $0x00, %%xmm1, %%xmm1
-
-       movaps 4(%3),  %%xmm2
-       movaps 4(%4),  %%xmm3
-       mulps  %%xmm0, %%xmm2
-       mulps  %%xmm1, %%xmm3
-       movaps 20(%3), %%xmm4
-       mulps  %%xmm0, %%xmm4
-       addps  4(%0),  %%xmm2
-       movaps 20(%4), %%xmm5
-       mulps  %%xmm1, %%xmm5
-       addps  20(%0), %%xmm4
-       subps  %%xmm3, %%xmm2
-       movups %%xmm2, (%0)
-       subps  %%xmm5, %%xmm4
-       movups %%xmm4, 16(%0)
-
-       movss  36(%3), %%xmm2
-       mulss  %%xmm0, %%xmm2
-       movss  36(%4), %%xmm3
-       mulss  %%xmm1, %%xmm3
-       addss  36(%0), %%xmm2
-       movss  40(%3), %%xmm4
-       mulss  %%xmm0, %%xmm4
-       movss  40(%4), %%xmm5
-       mulss  %%xmm1, %%xmm5
-       subss  %%xmm3, %%xmm2
-       movss  %%xmm2, 32(%0)       
-       subss  %%xmm5, %%xmm4
-       movss  %%xmm4, 36(%0)
-
-
-
-       movss 4(%1), %%xmm0
-       movss (%0), %%xmm1
-       addss %%xmm0, %%xmm1
-       movss %%xmm1, 4(%2)
-       shufps $0x00, %%xmm0, %%xmm0
-       shufps $0x00, %%xmm1, %%xmm1
-
-       movaps 4(%3),  %%xmm2
-       movaps 4(%4),  %%xmm3
-       mulps  %%xmm0, %%xmm2
-       mulps  %%xmm1, %%xmm3
-       movaps 20(%3), %%xmm4
-       mulps  %%xmm0, %%xmm4
-       addps  4(%0),  %%xmm2
-       movaps 20(%4), %%xmm5
-       mulps  %%xmm1, %%xmm5
-       addps  20(%0), %%xmm4
-       subps  %%xmm3, %%xmm2
-       movups %%xmm2, (%0)
-       subps  %%xmm5, %%xmm4
-       movups %%xmm4, 16(%0)
-
-       movss  36(%3), %%xmm2
-       mulss  %%xmm0, %%xmm2
-       movss  36(%4), %%xmm3
-       mulss  %%xmm1, %%xmm3
-       addss  36(%0), %%xmm2
-       movss  40(%3), %%xmm4
-       mulss  %%xmm0, %%xmm4
-       movss  40(%4), %%xmm5
-       mulss  %%xmm1, %%xmm5
-       subss  %%xmm3, %%xmm2
-       movss  %%xmm2, 32(%0)       
-       subss  %%xmm5, %%xmm4
-       movss  %%xmm4, 36(%0)
-
-
-
-       movss 8(%1), %%xmm0
-       movss (%0), %%xmm1
-       addss %%xmm0, %%xmm1
-       movss %%xmm1, 8(%2)
-       shufps $0x00, %%xmm0, %%xmm0
-       shufps $0x00, %%xmm1, %%xmm1
-
-       movaps 4(%3),  %%xmm2
-       movaps 4(%4),  %%xmm3
-       mulps  %%xmm0, %%xmm2
-       mulps  %%xmm1, %%xmm3
-       movaps 20(%3), %%xmm4
-       mulps  %%xmm0, %%xmm4
-       addps  4(%0),  %%xmm2
-       movaps 20(%4), %%xmm5
-       mulps  %%xmm1, %%xmm5
-       addps  20(%0), %%xmm4
-       subps  %%xmm3, %%xmm2
-       movups %%xmm2, (%0)
-       subps  %%xmm5, %%xmm4
-       movups %%xmm4, 16(%0)
-
-       movss  36(%3), %%xmm2
-       mulss  %%xmm0, %%xmm2
-       movss  36(%4), %%xmm3
-       mulss  %%xmm1, %%xmm3
-       addss  36(%0), %%xmm2
-       movss  40(%3), %%xmm4
-       mulss  %%xmm0, %%xmm4
-       movss  40(%4), %%xmm5
-       mulss  %%xmm1, %%xmm5
-       subss  %%xmm3, %%xmm2
-       movss  %%xmm2, 32(%0)       
-       subss  %%xmm5, %%xmm4
-       movss  %%xmm4, 36(%0)
-
-
-
-       movss 12(%1), %%xmm0
-       movss (%0), %%xmm1
-       addss %%xmm0, %%xmm1
-       movss %%xmm1, 12(%2)
-       shufps $0x00, %%xmm0, %%xmm0
-       shufps $0x00, %%xmm1, %%xmm1
-
-       movaps 4(%3),  %%xmm2
-       movaps 4(%4),  %%xmm3
-       mulps  %%xmm0, %%xmm2
-       mulps  %%xmm1, %%xmm3
-       movaps 20(%3), %%xmm4
-       mulps  %%xmm0, %%xmm4
-       addps  4(%0),  %%xmm2
-       movaps 20(%4), %%xmm5
-       mulps  %%xmm1, %%xmm5
-       addps  20(%0), %%xmm4
-       subps  %%xmm3, %%xmm2
-       movups %%xmm2, (%0)
-       subps  %%xmm5, %%xmm4
-       movups %%xmm4, 16(%0)
-
-       movss  36(%3), %%xmm2
-       mulss  %%xmm0, %%xmm2
-       movss  36(%4), %%xmm3
-       mulss  %%xmm1, %%xmm3
-       addss  36(%0), %%xmm2
-       movss  40(%3), %%xmm4
-       mulss  %%xmm0, %%xmm4
-       movss  40(%4), %%xmm5
-       mulss  %%xmm1, %%xmm5
-       subss  %%xmm3, %%xmm2
-       movss  %%xmm2, 32(%0)       
-       subss  %%xmm5, %%xmm4
-       movss  %%xmm4, 36(%0)
+      (
+       "\tmovss (%1), %%xmm0\n"
+       "\tmovss (%0), %%xmm1\n"
+       "\taddss %%xmm0, %%xmm1\n"
+       "\tmovss %%xmm1, (%2)\n"
+       "\tshufps $0x00, %%xmm0, %%xmm0\n"
+       "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+       "\tmovaps 4(%3),  %%xmm2\n"
+       "\tmovaps 4(%4),  %%xmm3\n"
+       "\tmulps  %%xmm0, %%xmm2\n"
+       "\tmulps  %%xmm1, %%xmm3\n"
+       "\tmovaps 20(%3), %%xmm4\n"
+       "\tmulps  %%xmm0, %%xmm4\n"
+       "\taddps  4(%0),  %%xmm2\n"
+       "\tmovaps 20(%4), %%xmm5\n"
+       "\tmulps  %%xmm1, %%xmm5\n"
+       "\taddps  20(%0), %%xmm4\n"
+       "\tsubps  %%xmm3, %%xmm2\n"
+       "\tmovups %%xmm2, (%0)\n"
+       "\tsubps  %%xmm5, %%xmm4\n"
+       "\tmovups %%xmm4, 16(%0)\n"
+
+       "\tmovss  36(%3), %%xmm2\n"
+       "\tmulss  %%xmm0, %%xmm2\n"
+       "\tmovss  36(%4), %%xmm3\n"
+       "\tmulss  %%xmm1, %%xmm3\n"
+       "\taddss  36(%0), %%xmm2\n"
+       "\tmovss  40(%3), %%xmm4\n"
+       "\tmulss  %%xmm0, %%xmm4\n"
+       "\tmovss  40(%4), %%xmm5\n"
+       "\tmulss  %%xmm1, %%xmm5\n"
+       "\tsubss  %%xmm3, %%xmm2\n"
+       "\tmovss  %%xmm2, 32(%0)       \n"
+       "\tsubss  %%xmm5, %%xmm4\n"
+       "\tmovss  %%xmm4, 36(%0)\n"
+
+
+
+       "\tmovss 4(%1), %%xmm0\n"
+       "\tmovss (%0), %%xmm1\n"
+       "\taddss %%xmm0, %%xmm1\n"
+       "\tmovss %%xmm1, 4(%2)\n"
+       "\tshufps $0x00, %%xmm0, %%xmm0\n"
+       "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+       "\tmovaps 4(%3),  %%xmm2\n"
+       "\tmovaps 4(%4),  %%xmm3\n"
+       "\tmulps  %%xmm0, %%xmm2\n"
+       "\tmulps  %%xmm1, %%xmm3\n"
+       "\tmovaps 20(%3), %%xmm4\n"
+       "\tmulps  %%xmm0, %%xmm4\n"
+       "\taddps  4(%0),  %%xmm2\n"
+       "\tmovaps 20(%4), %%xmm5\n"
+       "\tmulps  %%xmm1, %%xmm5\n"
+       "\taddps  20(%0), %%xmm4\n"
+       "\tsubps  %%xmm3, %%xmm2\n"
+       "\tmovups %%xmm2, (%0)\n"
+       "\tsubps  %%xmm5, %%xmm4\n"
+       "\tmovups %%xmm4, 16(%0)\n"
+
+       "\tmovss  36(%3), %%xmm2\n"
+       "\tmulss  %%xmm0, %%xmm2\n"
+       "\tmovss  36(%4), %%xmm3\n"
+       "\tmulss  %%xmm1, %%xmm3\n"
+       "\taddss  36(%0), %%xmm2\n"
+       "\tmovss  40(%3), %%xmm4\n"
+       "\tmulss  %%xmm0, %%xmm4\n"
+       "\tmovss  40(%4), %%xmm5\n"
+       "\tmulss  %%xmm1, %%xmm5\n"
+       "\tsubss  %%xmm3, %%xmm2\n"
+       "\tmovss  %%xmm2, 32(%0)       \n"
+       "\tsubss  %%xmm5, %%xmm4\n"
+       "\tmovss  %%xmm4, 36(%0)\n"
+
+
+
+       "\tmovss 8(%1), %%xmm0\n"
+       "\tmovss (%0), %%xmm1\n"
+       "\taddss %%xmm0, %%xmm1\n"
+       "\tmovss %%xmm1, 8(%2)\n"
+       "\tshufps $0x00, %%xmm0, %%xmm0\n"
+       "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+       "\tmovaps 4(%3),  %%xmm2\n"
+       "\tmovaps 4(%4),  %%xmm3\n"
+       "\tmulps  %%xmm0, %%xmm2\n"
+       "\tmulps  %%xmm1, %%xmm3\n"
+       "\tmovaps 20(%3), %%xmm4\n"
+       "\tmulps  %%xmm0, %%xmm4\n"
+       "\taddps  4(%0),  %%xmm2\n"
+       "\tmovaps 20(%4), %%xmm5\n"
+       "\tmulps  %%xmm1, %%xmm5\n"
+       "\taddps  20(%0), %%xmm4\n"
+       "\tsubps  %%xmm3, %%xmm2\n"
+       "\tmovups %%xmm2, (%0)\n"
+       "\tsubps  %%xmm5, %%xmm4\n"
+       "\tmovups %%xmm4, 16(%0)\n"
+
+       "\tmovss  36(%3), %%xmm2\n"
+       "\tmulss  %%xmm0, %%xmm2\n"
+       "\tmovss  36(%4), %%xmm3\n"
+       "\tmulss  %%xmm1, %%xmm3\n"
+       "\taddss  36(%0), %%xmm2\n"
+       "\tmovss  40(%3), %%xmm4\n"
+       "\tmulss  %%xmm0, %%xmm4\n"
+       "\tmovss  40(%4), %%xmm5\n"
+       "\tmulss  %%xmm1, %%xmm5\n"
+       "\tsubss  %%xmm3, %%xmm2\n"
+       "\tmovss  %%xmm2, 32(%0)       \n"
+       "\tsubss  %%xmm5, %%xmm4\n"
+       "\tmovss  %%xmm4, 36(%0)\n"
+
+
+
+       "\tmovss 12(%1), %%xmm0\n"
+       "\tmovss (%0), %%xmm1\n"
+       "\taddss %%xmm0, %%xmm1\n"
+       "\tmovss %%xmm1, 12(%2)\n"
+       "\tshufps $0x00, %%xmm0, %%xmm0\n"
+       "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+       "\tmovaps 4(%3),  %%xmm2\n"
+       "\tmovaps 4(%4),  %%xmm3\n"
+       "\tmulps  %%xmm0, %%xmm2\n"
+       "\tmulps  %%xmm1, %%xmm3\n"
+       "\tmovaps 20(%3), %%xmm4\n"
+       "\tmulps  %%xmm0, %%xmm4\n"
+       "\taddps  4(%0),  %%xmm2\n"
+       "\tmovaps 20(%4), %%xmm5\n"
+       "\tmulps  %%xmm1, %%xmm5\n"
+       "\taddps  20(%0), %%xmm4\n"
+       "\tsubps  %%xmm3, %%xmm2\n"
+       "\tmovups %%xmm2, (%0)\n"
+       "\tsubps  %%xmm5, %%xmm4\n"
+       "\tmovups %%xmm4, 16(%0)\n"
+
+       "\tmovss  36(%3), %%xmm2\n"
+       "\tmulss  %%xmm0, %%xmm2\n"
+       "\tmovss  36(%4), %%xmm3\n"
+       "\tmulss  %%xmm1, %%xmm3\n"
+       "\taddss  36(%0), %%xmm2\n"
+       "\tmovss  40(%3), %%xmm4\n"
+       "\tmulss  %%xmm0, %%xmm4\n"
+       "\tmovss  40(%4), %%xmm5\n"
+       "\tmulss  %%xmm1, %%xmm5\n"
+       "\tsubss  %%xmm3, %%xmm2\n"
+       "\tmovss  %%xmm2, 32(%0)       \n"
+       "\tsubss  %%xmm5, %%xmm4\n"
+       "\tmovss  %%xmm4, 36(%0)\n"
 
-       "
        : : "r" (mem), "r" (x+i), "r" (y+i), "r" (num), "r" (den)
        : "memory" );
 
@@ -247,39 +246,38 @@
       mem[ord-1] = - den[ord]*y[i];
 #else
       __asm__ __volatile__ 
-      ("
-       movss (%1), %%xmm0
-       movss (%0), %%xmm1
-       addss %%xmm0, %%xmm1
-       movss %%xmm1, (%2)
-       shufps $0x00, %%xmm0, %%xmm0
-       shufps $0x00, %%xmm1, %%xmm1
+      (
+       "\tmovss (%1), %%xmm0\n"
+       "\tmovss (%0), %%xmm1\n"
+       "\taddss %%xmm0, %%xmm1\n"
+       "\tmovss %%xmm1, (%2)\n"
+       "\tshufps $0x00, %%xmm0, %%xmm0\n"
+       "\tshufps $0x00, %%xmm1, %%xmm1\n"
 
        
-       movaps 4(%3),  %%xmm2
-       movaps 20(%3), %%xmm3
-       mulps  %%xmm1, %%xmm2
-       mulps  %%xmm1, %%xmm3
-       movss  36(%3), %%xmm4
-       movss  40(%3), %%xmm5
-       mulss  %%xmm1, %%xmm4
-       mulss  %%xmm1, %%xmm5
-       movaps 4(%0),  %%xmm6
-       subps  %%xmm2, %%xmm6
-       movups %%xmm6, (%0)
-       movaps 20(%0), %%xmm7
-       subps  %%xmm3, %%xmm7
-       movups %%xmm7, 16(%0)
-
-
-       movss  36(%0), %%xmm7
-       subss  %%xmm4, %%xmm7
-       movss  %%xmm7, 32(%0)       
-       xorps  %%xmm2, %%xmm2
-       subss  %%xmm5, %%xmm2
-       movss  %%xmm2, 36(%0)
+       "\tmovaps 4(%3),  %%xmm2\n"
+       "\tmovaps 20(%3), %%xmm3\n"
+       "\tmulps  %%xmm1, %%xmm2\n"
+       "\tmulps  %%xmm1, %%xmm3\n"
+       "\tmovss  36(%3), %%xmm4\n"
+       "\tmovss  40(%3), %%xmm5\n"
+       "\tmulss  %%xmm1, %%xmm4\n"
+       "\tmulss  %%xmm1, %%xmm5\n"
+       "\tmovaps 4(%0),  %%xmm6\n"
+       "\tsubps  %%xmm2, %%xmm6\n"
+       "\tmovups %%xmm6, (%0)\n"
+       "\tmovaps 20(%0), %%xmm7\n"
+       "\tsubps  %%xmm3, %%xmm7\n"
+       "\tmovups %%xmm7, 16(%0)\n"
+
+
+       "\tmovss  36(%0), %%xmm7\n"
+       "\tsubss  %%xmm4, %%xmm7\n"
+       "\tmovss  %%xmm7, 32(%0)       \n"
+       "\txorps  %%xmm2, %%xmm2\n"
+       "\tsubss  %%xmm5, %%xmm2\n"
+       "\tmovss  %%xmm2, 36(%0)\n"
 
-       "
        : : "r" (mem), "r" (x+i), "r" (y+i), "r" (den)
        : "memory" );
 #endif

<p><p>1.3       +54 -56    speex/libspeex/ltp_sse.h

Index: ltp_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp_sse.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- ltp_sse.h	27 Oct 2002 06:01:30 -0000	1.2
+++ ltp_sse.h	2 May 2003 02:08:25 -0000	1.3
@@ -35,63 +35,61 @@
 {
   float sum;
   __asm__ __volatile__ (
-  "
-  push %%eax
-  push %%edi
-  push %%ecx
-  xorps %%xmm3, %%xmm3
-  xorps %%xmm4, %%xmm4
-
-  sub $20, %%ecx
-
-.mul20_loop%=:
-
-  movups (%%eax), %%xmm0
-  movups (%%edi), %%xmm1
-  mulps %%xmm0, %%xmm1
-
-  movups 16(%%eax), %%xmm5
-  movups 16(%%edi), %%xmm6
-  mulps %%xmm5, %%xmm6
-  addps %%xmm1, %%xmm3
-
-  movups 32(%%eax), %%xmm0
-  movups 32(%%edi), %%xmm1
-  mulps %%xmm0, %%xmm1
-  addps %%xmm6, %%xmm4
-
-  movups 48(%%eax), %%xmm5
-  movups 48(%%edi), %%xmm6
-  mulps %%xmm5, %%xmm6
-  addps %%xmm1, %%xmm3
-
-  movups 64(%%eax), %%xmm0
-  movups 64(%%edi), %%xmm1
-  mulps %%xmm0, %%xmm1
-  addps %%xmm6, %%xmm4
-  addps %%xmm1, %%xmm3
-
-
-  add $80, %%eax
-  add $80, %%edi
-
-  sub $20,  %%ecx
-
-  jae .mul20_loop%=
-
-  addps %%xmm4, %%xmm3
-
-  movhlps %%xmm3, %%xmm4
-  addps %%xmm4, %%xmm3
-  movaps %%xmm3, %%xmm4
-  shufps $0x55, %%xmm4, %%xmm4
-  addss %%xmm4, %%xmm3
-  movss %%xmm3, (%%edx)
+  "\tpush %%eax\n"
+  "\tpush %%edi\n"
+  "\tpush %%ecx\n"
+  "\txorps %%xmm3, %%xmm3\n"
+  "\txorps %%xmm4, %%xmm4\n"
+
+  "\tsub $20, %%ecx\n"
+
+".mul20_loop%=:\n"
+
+  "\tmovups (%%eax), %%xmm0\n"
+  "\tmovups (%%edi), %%xmm1\n"
+  "\tmulps %%xmm0, %%xmm1\n"
+
+  "\tmovups 16(%%eax), %%xmm5\n"
+  "\tmovups 16(%%edi), %%xmm6\n"
+  "\tmulps %%xmm5, %%xmm6\n"
+  "\taddps %%xmm1, %%xmm3\n"
+
+  "\tmovups 32(%%eax), %%xmm0\n"
+  "\tmovups 32(%%edi), %%xmm1\n"
+  "\tmulps %%xmm0, %%xmm1\n"
+  "\taddps %%xmm6, %%xmm4\n"
+
+  "\tmovups 48(%%eax), %%xmm5\n"
+  "\tmovups 48(%%edi), %%xmm6\n"
+  "\tmulps %%xmm5, %%xmm6\n"
+  "\taddps %%xmm1, %%xmm3\n"
+
+  "\tmovups 64(%%eax), %%xmm0\n"
+  "\tmovups 64(%%edi), %%xmm1\n"
+  "\tmulps %%xmm0, %%xmm1\n"
+  "\taddps %%xmm6, %%xmm4\n"
+  "\taddps %%xmm1, %%xmm3\n"
+
+
+  "\tadd $80, %%eax\n"
+  "\tadd $80, %%edi\n"
+
+  "\tsub $20,  %%ecx\n"
+
+  "\tjae .mul20_loop%=\n"
+
+  "\taddps %%xmm4, %%xmm3\n"
+
+  "\tmovhlps %%xmm3, %%xmm4\n"
+  "\taddps %%xmm4, %%xmm3\n"
+  "\tmovaps %%xmm3, %%xmm4\n"
+  "\tshufps $0x55, %%xmm4, %%xmm4\n"
+  "\taddss %%xmm4, %%xmm3\n"
+  "\tmovss %%xmm3, (%%edx)\n"
   
-  pop %%ecx
-  pop %%edi
-  pop %%eax
-  "
+  "\tpop %%ecx\n"
+  "\tpop %%edi\n"
+  "\tpop %%eax\n"
   : : "a" (a), "D" (b), "c" (len), "d" (&sum) : "memory");
   return sum;
 }

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list