[xiph-cvs] cvs commit: speex/libspeex ltp.c ltp_sse.h
Jean-Marc Valin
jm at xiph.org
Sat Jan 17 12:52:53 PST 2004
jm 04/01/17 15:52:53
Modified: libspeex ltp.c ltp_sse.h
Log:
faster SSE implementation (reduced unaligned loads)
Revision Changes Path
1.108 +14 -6 speex/libspeex/ltp.c
Index: ltp.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp.c,v
retrieving revision 1.107
retrieving revision 1.108
diff -u -r1.107 -r1.108
--- ltp.c 4 Dec 2003 21:29:17 -0000 1.107
+++ ltp.c 17 Jan 2004 20:52:52 -0000 1.108
@@ -42,7 +42,7 @@
#ifdef _USE_SSE
#include "ltp_sse.h"
#else
-static spx_word32_t inner_prod(spx_word16_t *x, spx_word16_t *y, int len)
+static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
{
int i;
spx_word32_t sum=0;
@@ -57,6 +57,18 @@
}
return sum;
}
+
+static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+{
+ int i;
+ for (i=0;i<nb_pitch;i++)
+ {
+ /* Compute correlation*/
+ corr[nb_pitch-1-i]=inner_prod(_x, _y+i, len);
+ }
+
+}
+
#endif
void open_loop_nbest_pitch(spx_sig_t *sw, int start, int end, int len, int *pitch, spx_word16_t *gain, int N, char *stack)
@@ -102,11 +114,7 @@
score[i-start]=0;
}
- for (i=start;i<=end;i++)
- {
- /* Compute correlation*/
- corr[i-start]=inner_prod(swn, swn-i, len);
- }
+ pitch_xcorr(swn, swn-end, corr, len, end-start+1, stack);
#ifdef FIXED_POINT
{
<p><p>1.4 +47 -1 speex/libspeex/ltp_sse.h
Index: ltp_sse.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/ltp_sse.h,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- ltp_sse.h 2 May 2003 02:08:25 -0000 1.3
+++ ltp_sse.h 17 Jan 2004 20:52:52 -0000 1.4
@@ -30,8 +30,9 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <xmmintrin.h>
-static float inner_prod(float *a, float *b, int len)
+static float inner_prod(const float *a, const float *b, int len)
{
float sum;
__asm__ __volatile__ (
@@ -93,3 +94,48 @@
: : "a" (a), "D" (b), "c" (len), "d" (&sum) : "memory");
return sum;
}
+
+
+static void pitch_xcorr(const float *_x, const float *_y, float *corr, int len, int nb_pitch, char *stack)
+{
+ int i, offset;
+ __m128 *x, *y;
+ int N, L;
+ N = len>>2;
+ L = nb_pitch>>2;
+ x = PUSH(stack, N, __m128);
+ y = PUSH(stack, N+L, __m128);
+ for (i=0;i<N;i++)
+ x[i] = _mm_loadu_ps(_x+(i<<2));
+ for (offset=0;offset<4;offset++)
+ {
+ for (i=0;i<N+L;i++)
+ y[i] = _mm_loadu_ps(_y+(i<<2)+offset);
+ for (i=0;i<L;i++)
+ {
+ int j;
+ __m128 sum, *xx, *yy;
+ sum = _mm_setzero_ps();
+ yy = y+i;
+ xx = x;
+ for (j=0;j<N;j+=10)
+ {
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[0], yy[0]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[1], yy[1]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[2], yy[2]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[3], yy[3]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[4], yy[4]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[5], yy[5]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[6], yy[6]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[7], yy[7]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[8], yy[8]));
+ sum = _mm_add_ps(sum, _mm_mul_ps(xx[9], yy[9]));
+ xx += 10;
+ yy += 10;
+ }
+ sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
+ sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
+ _mm_store_ss(corr+nb_pitch-1-(i<<2)-offset, sum);
+ }
+ }
+}
<p><p>--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list