[xiph-cvs] cvs commit: speex/libspeex filters_sse.h ltp_sse.h Makefile.am filters.c ltp.c
Jean-Marc Valin
jm at xiph.org
Fri Oct 25 21:37:15 PDT 2002
jm 02/10/26 00:37:15
Modified: libspeex Makefile.am filters.c ltp.c
Added: libspeex filters_sse.h ltp_sse.h
Log:
Added SSE support (gcc only) by defining _USE_SSE
Revision Changes Path
1.41 +4 -2 speex/libspeex/Makefile.am
Index: Makefile.am
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/Makefile.am,v
retrieving revision 1.40
retrieving revision 1.41
diff -u -r1.40 -r1.41
--- Makefile.am 2 Oct 2002 19:52:52 -0000 1.40
+++ Makefile.am 26 Oct 2002 04:37:15 -0000 1.41
@@ -1,6 +1,6 @@
## Process this file with automake to produce Makefile.in. -*-Makefile-*-
-# $Id: Makefile.am,v 1.40 2002/10/02 19:52:52 jmvalin Exp $
+# $Id: Makefile.am,v 1.41 2002/10/26 04:37:15 jm Exp $
# Disable automatic dependency tracking if using other tools than gcc and gmake
#AUTOMAKE_OPTIONS = no-dependencies
@@ -53,7 +53,9 @@
modes.h \
sb_celp.h \
vbr.h \
- misc.h
+ misc.h \
+ ltp_sse.h \
+ filters_sse.h
libspeex_la_LDFLAGS = -release $(LT_RELEASE)
<p><p>1.27 +15 -14 speex/libspeex/filters.c
Index: filters.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/filters.c,v
retrieving revision 1.26
retrieving revision 1.27
diff -u -r1.26 -r1.27
--- filters.c 26 Oct 2002 02:58:36 -0000 1.26
+++ filters.c 26 Oct 2002 04:37:15 -0000 1.27
@@ -55,7 +55,9 @@
}
}
-
+#ifdef _USE_SSE
+#include "filters_sse.h"
+#else
void filter_mem2(float *x, float *num, float *den, float *y, int N, int ord, float *mem)
{
int i,j;
@@ -74,38 +76,37 @@
}
-void fir_mem2(float *x, float *num, float *y, int N, int ord, float *mem)
+void iir_mem2(float *x, float *den, float *y, int N, int ord, float *mem)
{
int i,j;
- float xi;
for (i=0;i<N;i++)
{
- xi=x[i];
- y[i] = num[0]*xi + mem[0];
+ y[i] = x[i] + mem[0];
for (j=0;j<ord-1;j++)
{
- mem[j] = mem[j+1] + num[j+1]*xi;
+ mem[j] = mem[j+1] - den[j+1]*y[i];
}
- mem[ord-1] = num[ord]*xi;
+ mem[ord-1] = - den[ord]*y[i];
}
}
+#endif
-void iir_mem2(float *x, float *den, float *y, int N, int ord, float *mem)
+void fir_mem2(float *x, float *num, float *y, int N, int ord, float *mem)
{
int i,j;
+ float xi;
for (i=0;i<N;i++)
{
- y[i] = x[i] + mem[0];
+ xi=x[i];
+ y[i] = num[0]*xi + mem[0];
for (j=0;j<ord-1;j++)
{
- mem[j] = mem[j+1] - den[j+1]*y[i];
+ mem[j] = mem[j+1] + num[j+1]*xi;
}
- mem[ord-1] = - den[ord]*y[i];
+ mem[ord-1] = num[ord]*xi;
}
}
-
-
void syn_percep_zero(float *xx, float *ak, float *awk1, float *awk2, float *y, int N, int ord, float *stack)
{
int i;
@@ -164,7 +165,7 @@
mem[i]=xx[N-i-1];
}
-
+/* By segher */
void fir_mem_up(float *x, float *a, float *y, int N, int M, float *mem)
/* assumptions:
all odd x[i] are zero -- well, actually they are left out of the array now
<p><p>1.61 +4 -1 speex/libspeex/ltp.c
Index: ltp.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp.c,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- ltp.c 24 Oct 2002 06:29:13 -0000 1.60
+++ ltp.c 26 Oct 2002 04:37:15 -0000 1.61
@@ -37,7 +37,9 @@
#include "filters.h"
#include "speex_bits.h"
-
+#ifdef _USE_SSE
+#include "ltp_sse.h"
+#else
static float inner_prod(float *x, float *y, int len)
{
int i;
@@ -52,6 +54,7 @@
}
return sum1+sum2+sum3+sum4;
}
+#endif
/*Original, non-optimized version*/
/*static float inner_prod(float *x, float *y, int len)
<p><p>1.1 speex/libspeex/filters_sse.h
Index: filters_sse.h
===================================================================
/* Copyright (C) 2002 Jean-Marc Valin
File: filters.c
Various analysis/synthesis filters
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void filter_mem2(float *x, float *_num, float *_den, float *y, int N, int ord, float *_mem)
{
float __num[20], __den[20], __mem[20];
float *num, *den, *mem;
int i,j;
float xi,yi;
num = (float*)(((int)(__num+4))&0xfffffff0)-1;
den = (float*)(((int)(__den+4))&0xfffffff0)-1;
mem = (float*)(((int)(__mem+4))&0xfffffff0)-1;
for (i=0;i<ord+1;i++)
{
num[i]=_num[i];
den[i]=_den[i];
}
for (i=0;i<ord;i++)
mem[i]=_mem[i];
for (i=0;i<N;i+=4)
{
__asm__ __volatile__
("
movss (%1), %%xmm0
movss (%0), %%xmm1
addss %%xmm0, %%xmm1
movss %%xmm1, (%2)
shufps $0x00, %%xmm0, %%xmm0
shufps $0x00, %%xmm1, %%xmm1
movaps 4(%3), %%xmm2
movaps 4(%4), %%xmm3
mulps %%xmm0, %%xmm2
mulps %%xmm1, %%xmm3
movaps 20(%3), %%xmm4
mulps %%xmm0, %%xmm4
addps 4(%0), %%xmm2
movaps 20(%4), %%xmm5
mulps %%xmm1, %%xmm5
addps 20(%0), %%xmm4
subps %%xmm3, %%xmm2
movups %%xmm2, (%0)
subps %%xmm5, %%xmm4
movups %%xmm4, 16(%0)
movss 36(%3), %%xmm2
mulss %%xmm0, %%xmm2
movss 36(%4), %%xmm3
mulss %%xmm1, %%xmm3
addss 36(%0), %%xmm2
movss 40(%3), %%xmm4
mulss %%xmm0, %%xmm4
movss 40(%4), %%xmm5
mulss %%xmm1, %%xmm5
subss %%xmm3, %%xmm2
movss %%xmm2, 32(%0)
subss %%xmm5, %%xmm4
movss %%xmm4, 36(%0)
<p><p> movss 4(%1), %%xmm0
movss (%0), %%xmm1
addss %%xmm0, %%xmm1
movss %%xmm1, 4(%2)
shufps $0x00, %%xmm0, %%xmm0
shufps $0x00, %%xmm1, %%xmm1
movaps 4(%3), %%xmm2
movaps 4(%4), %%xmm3
mulps %%xmm0, %%xmm2
mulps %%xmm1, %%xmm3
movaps 20(%3), %%xmm4
mulps %%xmm0, %%xmm4
addps 4(%0), %%xmm2
movaps 20(%4), %%xmm5
mulps %%xmm1, %%xmm5
addps 20(%0), %%xmm4
subps %%xmm3, %%xmm2
movups %%xmm2, (%0)
subps %%xmm5, %%xmm4
movups %%xmm4, 16(%0)
movss 36(%3), %%xmm2
mulss %%xmm0, %%xmm2
movss 36(%4), %%xmm3
mulss %%xmm1, %%xmm3
addss 36(%0), %%xmm2
movss 40(%3), %%xmm4
mulss %%xmm0, %%xmm4
movss 40(%4), %%xmm5
mulss %%xmm1, %%xmm5
subss %%xmm3, %%xmm2
movss %%xmm2, 32(%0)
subss %%xmm5, %%xmm4
movss %%xmm4, 36(%0)
<p><p> movss 8(%1), %%xmm0
movss (%0), %%xmm1
addss %%xmm0, %%xmm1
movss %%xmm1, 8(%2)
shufps $0x00, %%xmm0, %%xmm0
shufps $0x00, %%xmm1, %%xmm1
movaps 4(%3), %%xmm2
movaps 4(%4), %%xmm3
mulps %%xmm0, %%xmm2
mulps %%xmm1, %%xmm3
movaps 20(%3), %%xmm4
mulps %%xmm0, %%xmm4
addps 4(%0), %%xmm2
movaps 20(%4), %%xmm5
mulps %%xmm1, %%xmm5
addps 20(%0), %%xmm4
subps %%xmm3, %%xmm2
movups %%xmm2, (%0)
subps %%xmm5, %%xmm4
movups %%xmm4, 16(%0)
movss 36(%3), %%xmm2
mulss %%xmm0, %%xmm2
movss 36(%4), %%xmm3
mulss %%xmm1, %%xmm3
addss 36(%0), %%xmm2
movss 40(%3), %%xmm4
mulss %%xmm0, %%xmm4
movss 40(%4), %%xmm5
mulss %%xmm1, %%xmm5
subss %%xmm3, %%xmm2
movss %%xmm2, 32(%0)
subss %%xmm5, %%xmm4
movss %%xmm4, 36(%0)
<p><p> movss 12(%1), %%xmm0
movss (%0), %%xmm1
addss %%xmm0, %%xmm1
movss %%xmm1, 12(%2)
shufps $0x00, %%xmm0, %%xmm0
shufps $0x00, %%xmm1, %%xmm1
movaps 4(%3), %%xmm2
movaps 4(%4), %%xmm3
mulps %%xmm0, %%xmm2
mulps %%xmm1, %%xmm3
movaps 20(%3), %%xmm4
mulps %%xmm0, %%xmm4
addps 4(%0), %%xmm2
movaps 20(%4), %%xmm5
mulps %%xmm1, %%xmm5
addps 20(%0), %%xmm4
subps %%xmm3, %%xmm2
movups %%xmm2, (%0)
subps %%xmm5, %%xmm4
movups %%xmm4, 16(%0)
movss 36(%3), %%xmm2
mulss %%xmm0, %%xmm2
movss 36(%4), %%xmm3
mulss %%xmm1, %%xmm3
addss 36(%0), %%xmm2
movss 40(%3), %%xmm4
mulss %%xmm0, %%xmm4
movss 40(%4), %%xmm5
mulss %%xmm1, %%xmm5
subss %%xmm3, %%xmm2
movss %%xmm2, 32(%0)
subss %%xmm5, %%xmm4
movss %%xmm4, 36(%0)
"
: : "r" (mem), "r" (x+i), "r" (y+i), "r" (num), "r" (den)
: "memory" );
}
for (i=0;i<ord;i++)
_mem[i]=mem[i];
}
<p>void iir_mem2(float *x, float *_den, float *y, int N, int ord, float *_mem)
{
float __den[20], __mem[20];
float *num, *den, *mem;
int i,j;
den = (float*)(((int)(__den+4))&0xfffffff0)-1;
mem = (float*)(((int)(__mem+4))&0xfffffff0)-1;
for (i=0;i<ord+1;i++)
{
den[i]=_den[i];
}
for (i=0;i<ord;i++)
mem[i]=_mem[i];
for (i=0;i<N;i++)
{
#if 0
y[i] = x[i] + mem[0];
for (j=0;j<ord-1;j++)
{
mem[j] = mem[j+1] - den[j+1]*y[i];
}
mem[ord-1] = - den[ord]*y[i];
#else
__asm__ __volatile__
("
movss (%1), %%xmm0
movss (%0), %%xmm1
addss %%xmm0, %%xmm1
movss %%xmm1, (%2)
shufps $0x00, %%xmm0, %%xmm0
shufps $0x00, %%xmm1, %%xmm1
movaps 4(%3), %%xmm2
movaps 20(%3), %%xmm3
mulps %%xmm1, %%xmm2
mulps %%xmm1, %%xmm3
movss 36(%3), %%xmm4
movss 40(%3), %%xmm5
mulss %%xmm1, %%xmm4
mulss %%xmm1, %%xmm5
movaps 4(%0), %%xmm6
subps %%xmm2, %%xmm6
movups %%xmm6, (%0)
movaps 20(%0), %%xmm7
subps %%xmm3, %%xmm7
movups %%xmm7, 16(%0)
<p> movss 36(%0), %%xmm7
subss %%xmm4, %%xmm7
movss %%xmm7, 32(%0)
xorps %%xmm2, %%xmm2
subss %%xmm5, %%xmm2
movss %%xmm2, 36(%0)
"
: : "r" (mem), "r" (x+i), "r" (y+i), "r" (den)
: "memory" );
#endif
}
for (i=0;i<ord;i++)
_mem[i]=mem[i];
}
<p><p><p>1.1 speex/libspeex/ltp_sse.h
Index: ltp_sse.h
===================================================================
/* Copyright (C) 2002 Jean-Marc Valin
File: ltp.c
Lont-Term Prediction functions
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of the Xiph.org Foundation nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
<p>static float inner_prod(float *a, float *b, int len)
{
float sum;
__asm__ __volatile__ (
"
push %%eax
push %%edi
push %%ecx
xorps %%xmm3, %%xmm3
xorps %%xmm4, %%xmm4
sub $20, %%ecx
.mul20_loop%=:
movups (%%eax), %%xmm0
movups (%%edi), %%xmm1
mulps %%xmm0, %%xmm1
movups 16(%%eax), %%xmm5
movups 16(%%edi), %%xmm6
mulps %%xmm5, %%xmm6
addps %%xmm1, %%xmm3
movups 32(%%eax), %%xmm0
movups 32(%%edi), %%xmm1
mulps %%xmm0, %%xmm1
addps %%xmm6, %%xmm4
movups 48(%%eax), %%xmm5
movups 48(%%edi), %%xmm6
mulps %%xmm5, %%xmm6
addps %%xmm1, %%xmm3
movups 64(%%eax), %%xmm0
movups 64(%%edi), %%xmm1
mulps %%xmm0, %%xmm1
addps %%xmm6, %%xmm4
addps %%xmm1, %%xmm3
<p> add $80, %%eax
add $80, %%edi
sub $20, %%ecx
jae .mul20_loop%=
addps %%xmm4, %%xmm3
movhlps %%xmm3, %%xmm4
addps %%xmm4, %%xmm3
movaps %%xmm3, %%xmm4
//FIXME: which one?
shufps $0x55, %%xmm4, %%xmm4
//shufps $33, %%xmm4, %%xmm4
addss %%xmm4, %%xmm3
movss %%xmm3, (%%edx)
pop %%ecx
pop %%edi
pop %%eax
"
: : "a" (a), "D" (b), "c" (len), "d" (&sum) : "memory");
return sum;
}
<p><p>--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list