[xiph-cvs] cvs commit: speex/libspeex filters_sse.h ltp_sse.h
Jean-Marc Valin
jm at xiph.org
Thu May 1 19:08:25 PDT 2003
jm 03/05/01 22:08:25
Modified: libspeex filters_sse.h ltp_sse.h
Log:
removed multi-line strings
Revision Changes Path
1.4 +179 -181 speex/libspeex/filters_sse.h
Index: filters_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/filters_sse.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- filters_sse.h 19 Mar 2003 01:07:34 -0000 1.3
+++ filters_sse.h 2 May 2003 02:08:25 -0000 1.4
@@ -55,158 +55,157 @@
{
__asm__ __volatile__
- ("
- movss (%1), %%xmm0
- movss (%0), %%xmm1
- addss %%xmm0, %%xmm1
- movss %%xmm1, (%2)
- shufps $0x00, %%xmm0, %%xmm0
- shufps $0x00, %%xmm1, %%xmm1
-
- movaps 4(%3), %%xmm2
- movaps 4(%4), %%xmm3
- mulps %%xmm0, %%xmm2
- mulps %%xmm1, %%xmm3
- movaps 20(%3), %%xmm4
- mulps %%xmm0, %%xmm4
- addps 4(%0), %%xmm2
- movaps 20(%4), %%xmm5
- mulps %%xmm1, %%xmm5
- addps 20(%0), %%xmm4
- subps %%xmm3, %%xmm2
- movups %%xmm2, (%0)
- subps %%xmm5, %%xmm4
- movups %%xmm4, 16(%0)
-
- movss 36(%3), %%xmm2
- mulss %%xmm0, %%xmm2
- movss 36(%4), %%xmm3
- mulss %%xmm1, %%xmm3
- addss 36(%0), %%xmm2
- movss 40(%3), %%xmm4
- mulss %%xmm0, %%xmm4
- movss 40(%4), %%xmm5
- mulss %%xmm1, %%xmm5
- subss %%xmm3, %%xmm2
- movss %%xmm2, 32(%0)
- subss %%xmm5, %%xmm4
- movss %%xmm4, 36(%0)
-
-
-
- movss 4(%1), %%xmm0
- movss (%0), %%xmm1
- addss %%xmm0, %%xmm1
- movss %%xmm1, 4(%2)
- shufps $0x00, %%xmm0, %%xmm0
- shufps $0x00, %%xmm1, %%xmm1
-
- movaps 4(%3), %%xmm2
- movaps 4(%4), %%xmm3
- mulps %%xmm0, %%xmm2
- mulps %%xmm1, %%xmm3
- movaps 20(%3), %%xmm4
- mulps %%xmm0, %%xmm4
- addps 4(%0), %%xmm2
- movaps 20(%4), %%xmm5
- mulps %%xmm1, %%xmm5
- addps 20(%0), %%xmm4
- subps %%xmm3, %%xmm2
- movups %%xmm2, (%0)
- subps %%xmm5, %%xmm4
- movups %%xmm4, 16(%0)
-
- movss 36(%3), %%xmm2
- mulss %%xmm0, %%xmm2
- movss 36(%4), %%xmm3
- mulss %%xmm1, %%xmm3
- addss 36(%0), %%xmm2
- movss 40(%3), %%xmm4
- mulss %%xmm0, %%xmm4
- movss 40(%4), %%xmm5
- mulss %%xmm1, %%xmm5
- subss %%xmm3, %%xmm2
- movss %%xmm2, 32(%0)
- subss %%xmm5, %%xmm4
- movss %%xmm4, 36(%0)
-
-
-
- movss 8(%1), %%xmm0
- movss (%0), %%xmm1
- addss %%xmm0, %%xmm1
- movss %%xmm1, 8(%2)
- shufps $0x00, %%xmm0, %%xmm0
- shufps $0x00, %%xmm1, %%xmm1
-
- movaps 4(%3), %%xmm2
- movaps 4(%4), %%xmm3
- mulps %%xmm0, %%xmm2
- mulps %%xmm1, %%xmm3
- movaps 20(%3), %%xmm4
- mulps %%xmm0, %%xmm4
- addps 4(%0), %%xmm2
- movaps 20(%4), %%xmm5
- mulps %%xmm1, %%xmm5
- addps 20(%0), %%xmm4
- subps %%xmm3, %%xmm2
- movups %%xmm2, (%0)
- subps %%xmm5, %%xmm4
- movups %%xmm4, 16(%0)
-
- movss 36(%3), %%xmm2
- mulss %%xmm0, %%xmm2
- movss 36(%4), %%xmm3
- mulss %%xmm1, %%xmm3
- addss 36(%0), %%xmm2
- movss 40(%3), %%xmm4
- mulss %%xmm0, %%xmm4
- movss 40(%4), %%xmm5
- mulss %%xmm1, %%xmm5
- subss %%xmm3, %%xmm2
- movss %%xmm2, 32(%0)
- subss %%xmm5, %%xmm4
- movss %%xmm4, 36(%0)
-
-
-
- movss 12(%1), %%xmm0
- movss (%0), %%xmm1
- addss %%xmm0, %%xmm1
- movss %%xmm1, 12(%2)
- shufps $0x00, %%xmm0, %%xmm0
- shufps $0x00, %%xmm1, %%xmm1
-
- movaps 4(%3), %%xmm2
- movaps 4(%4), %%xmm3
- mulps %%xmm0, %%xmm2
- mulps %%xmm1, %%xmm3
- movaps 20(%3), %%xmm4
- mulps %%xmm0, %%xmm4
- addps 4(%0), %%xmm2
- movaps 20(%4), %%xmm5
- mulps %%xmm1, %%xmm5
- addps 20(%0), %%xmm4
- subps %%xmm3, %%xmm2
- movups %%xmm2, (%0)
- subps %%xmm5, %%xmm4
- movups %%xmm4, 16(%0)
-
- movss 36(%3), %%xmm2
- mulss %%xmm0, %%xmm2
- movss 36(%4), %%xmm3
- mulss %%xmm1, %%xmm3
- addss 36(%0), %%xmm2
- movss 40(%3), %%xmm4
- mulss %%xmm0, %%xmm4
- movss 40(%4), %%xmm5
- mulss %%xmm1, %%xmm5
- subss %%xmm3, %%xmm2
- movss %%xmm2, 32(%0)
- subss %%xmm5, %%xmm4
- movss %%xmm4, 36(%0)
+ (
+ "\tmovss (%1), %%xmm0\n"
+ "\tmovss (%0), %%xmm1\n"
+ "\taddss %%xmm0, %%xmm1\n"
+ "\tmovss %%xmm1, (%2)\n"
+ "\tshufps $0x00, %%xmm0, %%xmm0\n"
+ "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+ "\tmovaps 4(%3), %%xmm2\n"
+ "\tmovaps 4(%4), %%xmm3\n"
+ "\tmulps %%xmm0, %%xmm2\n"
+ "\tmulps %%xmm1, %%xmm3\n"
+ "\tmovaps 20(%3), %%xmm4\n"
+ "\tmulps %%xmm0, %%xmm4\n"
+ "\taddps 4(%0), %%xmm2\n"
+ "\tmovaps 20(%4), %%xmm5\n"
+ "\tmulps %%xmm1, %%xmm5\n"
+ "\taddps 20(%0), %%xmm4\n"
+ "\tsubps %%xmm3, %%xmm2\n"
+ "\tmovups %%xmm2, (%0)\n"
+ "\tsubps %%xmm5, %%xmm4\n"
+ "\tmovups %%xmm4, 16(%0)\n"
+
+ "\tmovss 36(%3), %%xmm2\n"
+ "\tmulss %%xmm0, %%xmm2\n"
+ "\tmovss 36(%4), %%xmm3\n"
+ "\tmulss %%xmm1, %%xmm3\n"
+ "\taddss 36(%0), %%xmm2\n"
+ "\tmovss 40(%3), %%xmm4\n"
+ "\tmulss %%xmm0, %%xmm4\n"
+ "\tmovss 40(%4), %%xmm5\n"
+ "\tmulss %%xmm1, %%xmm5\n"
+ "\tsubss %%xmm3, %%xmm2\n"
+ "\tmovss %%xmm2, 32(%0) \n"
+ "\tsubss %%xmm5, %%xmm4\n"
+ "\tmovss %%xmm4, 36(%0)\n"
+
+
+
+ "\tmovss 4(%1), %%xmm0\n"
+ "\tmovss (%0), %%xmm1\n"
+ "\taddss %%xmm0, %%xmm1\n"
+ "\tmovss %%xmm1, 4(%2)\n"
+ "\tshufps $0x00, %%xmm0, %%xmm0\n"
+ "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+ "\tmovaps 4(%3), %%xmm2\n"
+ "\tmovaps 4(%4), %%xmm3\n"
+ "\tmulps %%xmm0, %%xmm2\n"
+ "\tmulps %%xmm1, %%xmm3\n"
+ "\tmovaps 20(%3), %%xmm4\n"
+ "\tmulps %%xmm0, %%xmm4\n"
+ "\taddps 4(%0), %%xmm2\n"
+ "\tmovaps 20(%4), %%xmm5\n"
+ "\tmulps %%xmm1, %%xmm5\n"
+ "\taddps 20(%0), %%xmm4\n"
+ "\tsubps %%xmm3, %%xmm2\n"
+ "\tmovups %%xmm2, (%0)\n"
+ "\tsubps %%xmm5, %%xmm4\n"
+ "\tmovups %%xmm4, 16(%0)\n"
+
+ "\tmovss 36(%3), %%xmm2\n"
+ "\tmulss %%xmm0, %%xmm2\n"
+ "\tmovss 36(%4), %%xmm3\n"
+ "\tmulss %%xmm1, %%xmm3\n"
+ "\taddss 36(%0), %%xmm2\n"
+ "\tmovss 40(%3), %%xmm4\n"
+ "\tmulss %%xmm0, %%xmm4\n"
+ "\tmovss 40(%4), %%xmm5\n"
+ "\tmulss %%xmm1, %%xmm5\n"
+ "\tsubss %%xmm3, %%xmm2\n"
+ "\tmovss %%xmm2, 32(%0) \n"
+ "\tsubss %%xmm5, %%xmm4\n"
+ "\tmovss %%xmm4, 36(%0)\n"
+
+
+
+ "\tmovss 8(%1), %%xmm0\n"
+ "\tmovss (%0), %%xmm1\n"
+ "\taddss %%xmm0, %%xmm1\n"
+ "\tmovss %%xmm1, 8(%2)\n"
+ "\tshufps $0x00, %%xmm0, %%xmm0\n"
+ "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+ "\tmovaps 4(%3), %%xmm2\n"
+ "\tmovaps 4(%4), %%xmm3\n"
+ "\tmulps %%xmm0, %%xmm2\n"
+ "\tmulps %%xmm1, %%xmm3\n"
+ "\tmovaps 20(%3), %%xmm4\n"
+ "\tmulps %%xmm0, %%xmm4\n"
+ "\taddps 4(%0), %%xmm2\n"
+ "\tmovaps 20(%4), %%xmm5\n"
+ "\tmulps %%xmm1, %%xmm5\n"
+ "\taddps 20(%0), %%xmm4\n"
+ "\tsubps %%xmm3, %%xmm2\n"
+ "\tmovups %%xmm2, (%0)\n"
+ "\tsubps %%xmm5, %%xmm4\n"
+ "\tmovups %%xmm4, 16(%0)\n"
+
+ "\tmovss 36(%3), %%xmm2\n"
+ "\tmulss %%xmm0, %%xmm2\n"
+ "\tmovss 36(%4), %%xmm3\n"
+ "\tmulss %%xmm1, %%xmm3\n"
+ "\taddss 36(%0), %%xmm2\n"
+ "\tmovss 40(%3), %%xmm4\n"
+ "\tmulss %%xmm0, %%xmm4\n"
+ "\tmovss 40(%4), %%xmm5\n"
+ "\tmulss %%xmm1, %%xmm5\n"
+ "\tsubss %%xmm3, %%xmm2\n"
+ "\tmovss %%xmm2, 32(%0) \n"
+ "\tsubss %%xmm5, %%xmm4\n"
+ "\tmovss %%xmm4, 36(%0)\n"
+
+
+
+ "\tmovss 12(%1), %%xmm0\n"
+ "\tmovss (%0), %%xmm1\n"
+ "\taddss %%xmm0, %%xmm1\n"
+ "\tmovss %%xmm1, 12(%2)\n"
+ "\tshufps $0x00, %%xmm0, %%xmm0\n"
+ "\tshufps $0x00, %%xmm1, %%xmm1\n"
+
+ "\tmovaps 4(%3), %%xmm2\n"
+ "\tmovaps 4(%4), %%xmm3\n"
+ "\tmulps %%xmm0, %%xmm2\n"
+ "\tmulps %%xmm1, %%xmm3\n"
+ "\tmovaps 20(%3), %%xmm4\n"
+ "\tmulps %%xmm0, %%xmm4\n"
+ "\taddps 4(%0), %%xmm2\n"
+ "\tmovaps 20(%4), %%xmm5\n"
+ "\tmulps %%xmm1, %%xmm5\n"
+ "\taddps 20(%0), %%xmm4\n"
+ "\tsubps %%xmm3, %%xmm2\n"
+ "\tmovups %%xmm2, (%0)\n"
+ "\tsubps %%xmm5, %%xmm4\n"
+ "\tmovups %%xmm4, 16(%0)\n"
+
+ "\tmovss 36(%3), %%xmm2\n"
+ "\tmulss %%xmm0, %%xmm2\n"
+ "\tmovss 36(%4), %%xmm3\n"
+ "\tmulss %%xmm1, %%xmm3\n"
+ "\taddss 36(%0), %%xmm2\n"
+ "\tmovss 40(%3), %%xmm4\n"
+ "\tmulss %%xmm0, %%xmm4\n"
+ "\tmovss 40(%4), %%xmm5\n"
+ "\tmulss %%xmm1, %%xmm5\n"
+ "\tsubss %%xmm3, %%xmm2\n"
+ "\tmovss %%xmm2, 32(%0) \n"
+ "\tsubss %%xmm5, %%xmm4\n"
+ "\tmovss %%xmm4, 36(%0)\n"
- "
: : "r" (mem), "r" (x+i), "r" (y+i), "r" (num), "r" (den)
: "memory" );
@@ -247,39 +246,38 @@
mem[ord-1] = - den[ord]*y[i];
#else
__asm__ __volatile__
- ("
- movss (%1), %%xmm0
- movss (%0), %%xmm1
- addss %%xmm0, %%xmm1
- movss %%xmm1, (%2)
- shufps $0x00, %%xmm0, %%xmm0
- shufps $0x00, %%xmm1, %%xmm1
+ (
+ "\tmovss (%1), %%xmm0\n"
+ "\tmovss (%0), %%xmm1\n"
+ "\taddss %%xmm0, %%xmm1\n"
+ "\tmovss %%xmm1, (%2)\n"
+ "\tshufps $0x00, %%xmm0, %%xmm0\n"
+ "\tshufps $0x00, %%xmm1, %%xmm1\n"
- movaps 4(%3), %%xmm2
- movaps 20(%3), %%xmm3
- mulps %%xmm1, %%xmm2
- mulps %%xmm1, %%xmm3
- movss 36(%3), %%xmm4
- movss 40(%3), %%xmm5
- mulss %%xmm1, %%xmm4
- mulss %%xmm1, %%xmm5
- movaps 4(%0), %%xmm6
- subps %%xmm2, %%xmm6
- movups %%xmm6, (%0)
- movaps 20(%0), %%xmm7
- subps %%xmm3, %%xmm7
- movups %%xmm7, 16(%0)
-
-
- movss 36(%0), %%xmm7
- subss %%xmm4, %%xmm7
- movss %%xmm7, 32(%0)
- xorps %%xmm2, %%xmm2
- subss %%xmm5, %%xmm2
- movss %%xmm2, 36(%0)
+ "\tmovaps 4(%3), %%xmm2\n"
+ "\tmovaps 20(%3), %%xmm3\n"
+ "\tmulps %%xmm1, %%xmm2\n"
+ "\tmulps %%xmm1, %%xmm3\n"
+ "\tmovss 36(%3), %%xmm4\n"
+ "\tmovss 40(%3), %%xmm5\n"
+ "\tmulss %%xmm1, %%xmm4\n"
+ "\tmulss %%xmm1, %%xmm5\n"
+ "\tmovaps 4(%0), %%xmm6\n"
+ "\tsubps %%xmm2, %%xmm6\n"
+ "\tmovups %%xmm6, (%0)\n"
+ "\tmovaps 20(%0), %%xmm7\n"
+ "\tsubps %%xmm3, %%xmm7\n"
+ "\tmovups %%xmm7, 16(%0)\n"
+
+
+ "\tmovss 36(%0), %%xmm7\n"
+ "\tsubss %%xmm4, %%xmm7\n"
+ "\tmovss %%xmm7, 32(%0) \n"
+ "\txorps %%xmm2, %%xmm2\n"
+ "\tsubss %%xmm5, %%xmm2\n"
+ "\tmovss %%xmm2, 36(%0)\n"
- "
: : "r" (mem), "r" (x+i), "r" (y+i), "r" (den)
: "memory" );
#endif
<p><p>1.3 +54 -56 speex/libspeex/ltp_sse.h
Index: ltp_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp_sse.h,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- ltp_sse.h 27 Oct 2002 06:01:30 -0000 1.2
+++ ltp_sse.h 2 May 2003 02:08:25 -0000 1.3
@@ -35,63 +35,61 @@
{
float sum;
__asm__ __volatile__ (
- "
- push %%eax
- push %%edi
- push %%ecx
- xorps %%xmm3, %%xmm3
- xorps %%xmm4, %%xmm4
-
- sub $20, %%ecx
-
-.mul20_loop%=:
-
- movups (%%eax), %%xmm0
- movups (%%edi), %%xmm1
- mulps %%xmm0, %%xmm1
-
- movups 16(%%eax), %%xmm5
- movups 16(%%edi), %%xmm6
- mulps %%xmm5, %%xmm6
- addps %%xmm1, %%xmm3
-
- movups 32(%%eax), %%xmm0
- movups 32(%%edi), %%xmm1
- mulps %%xmm0, %%xmm1
- addps %%xmm6, %%xmm4
-
- movups 48(%%eax), %%xmm5
- movups 48(%%edi), %%xmm6
- mulps %%xmm5, %%xmm6
- addps %%xmm1, %%xmm3
-
- movups 64(%%eax), %%xmm0
- movups 64(%%edi), %%xmm1
- mulps %%xmm0, %%xmm1
- addps %%xmm6, %%xmm4
- addps %%xmm1, %%xmm3
-
-
- add $80, %%eax
- add $80, %%edi
-
- sub $20, %%ecx
-
- jae .mul20_loop%=
-
- addps %%xmm4, %%xmm3
-
- movhlps %%xmm3, %%xmm4
- addps %%xmm4, %%xmm3
- movaps %%xmm3, %%xmm4
- shufps $0x55, %%xmm4, %%xmm4
- addss %%xmm4, %%xmm3
- movss %%xmm3, (%%edx)
+ "\tpush %%eax\n"
+ "\tpush %%edi\n"
+ "\tpush %%ecx\n"
+ "\txorps %%xmm3, %%xmm3\n"
+ "\txorps %%xmm4, %%xmm4\n"
+
+ "\tsub $20, %%ecx\n"
+
+".mul20_loop%=:\n"
+
+ "\tmovups (%%eax), %%xmm0\n"
+ "\tmovups (%%edi), %%xmm1\n"
+ "\tmulps %%xmm0, %%xmm1\n"
+
+ "\tmovups 16(%%eax), %%xmm5\n"
+ "\tmovups 16(%%edi), %%xmm6\n"
+ "\tmulps %%xmm5, %%xmm6\n"
+ "\taddps %%xmm1, %%xmm3\n"
+
+ "\tmovups 32(%%eax), %%xmm0\n"
+ "\tmovups 32(%%edi), %%xmm1\n"
+ "\tmulps %%xmm0, %%xmm1\n"
+ "\taddps %%xmm6, %%xmm4\n"
+
+ "\tmovups 48(%%eax), %%xmm5\n"
+ "\tmovups 48(%%edi), %%xmm6\n"
+ "\tmulps %%xmm5, %%xmm6\n"
+ "\taddps %%xmm1, %%xmm3\n"
+
+ "\tmovups 64(%%eax), %%xmm0\n"
+ "\tmovups 64(%%edi), %%xmm1\n"
+ "\tmulps %%xmm0, %%xmm1\n"
+ "\taddps %%xmm6, %%xmm4\n"
+ "\taddps %%xmm1, %%xmm3\n"
+
+
+ "\tadd $80, %%eax\n"
+ "\tadd $80, %%edi\n"
+
+ "\tsub $20, %%ecx\n"
+
+ "\tjae .mul20_loop%=\n"
+
+ "\taddps %%xmm4, %%xmm3\n"
+
+ "\tmovhlps %%xmm3, %%xmm4\n"
+ "\taddps %%xmm4, %%xmm3\n"
+ "\tmovaps %%xmm3, %%xmm4\n"
+ "\tshufps $0x55, %%xmm4, %%xmm4\n"
+ "\taddss %%xmm4, %%xmm3\n"
+ "\tmovss %%xmm3, (%%edx)\n"
- pop %%ecx
- pop %%edi
- pop %%eax
- "
+ "\tpop %%ecx\n"
+ "\tpop %%edi\n"
+ "\tpop %%eax\n"
: : "a" (a), "D" (b), "c" (len), "d" (&sum) : "memory");
return sum;
}
<p><p>--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list