[xiph-commits] r8855 - trunk/speex/libspeex
jm at motherfish-iii.xiph.org
jm at motherfish-iii.xiph.org
Mon Feb 7 00:46:59 PST 2005
Author: jm
Date: 2005-02-07 00:46:56 -0800 (Mon, 07 Feb 2005)
New Revision: 8855
Added:
trunk/speex/libspeex/filters_arm4.h
trunk/speex/libspeex/fixed_arm4.h
trunk/speex/libspeex/ltp_arm4.h
Modified:
trunk/speex/libspeex/Makefile.am
trunk/speex/libspeex/arch.h
trunk/speex/libspeex/filters.c
trunk/speex/libspeex/ltp.c
Log:
ARM assembly version of pitch_xcorr, moved all ARM assembly to separate files
Modified: trunk/speex/libspeex/Makefile.am
===================================================================
--- trunk/speex/libspeex/Makefile.am 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/Makefile.am 2005-02-07 08:46:56 UTC (rev 8855)
@@ -59,10 +59,13 @@
vbr.h \
misc.h \
ltp_sse.h \
+ ltp_arm4.h \
filters_sse.h \
+ filters_arm4.h \
math_approx.h \
smallft.h \
arch.h \
+ fixed_arm4.h \
fixed_arm5e.h \
fixed_debug.h \
fixed_generic.h \
Modified: trunk/speex/libspeex/arch.h
===================================================================
--- trunk/speex/libspeex/arch.h 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/arch.h 2005-02-07 08:46:56 UTC (rev 8855)
@@ -66,6 +66,8 @@
#ifdef ARM5E_ASM
#include "fixed_arm5e.h"
+#elif defined (ARM4_ASM)
+#include "fixed_arm4.h"
#elif defined (FIXED_DEBUG)
#include "fixed_debug.h"
#else
Modified: trunk/speex/libspeex/filters.c
===================================================================
--- trunk/speex/libspeex/filters.c 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/filters.c 2005-02-07 08:46:56 UTC (rev 8855)
@@ -118,34 +118,8 @@
#ifdef FIXED_POINT
-int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len)
-{
- int i;
- spx_sig_t max_val=1;
- int sig_shift;
-
- for (i=0;i<len;i++)
- {
- spx_sig_t tmp = x[i];
- if (tmp<0)
- tmp = -tmp;
- if (tmp >= max_val)
- max_val = tmp;
- }
- sig_shift=0;
- while (max_val>max_scale)
- {
- sig_shift++;
- max_val >>= 1;
- }
- for (i=0;i<len;i++)
- y[i] = SHR(x[i], sig_shift);
-
- return sig_shift;
-}
-
spx_word16_t compute_rms(const spx_sig_t *x, int len)
{
int i;
@@ -188,95 +162,38 @@
}
#if defined(ARM4_ASM) || defined(ARM5E_ASM)
-void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
- int i,j;
- spx_sig_t xi,yi,nyi;
+#include "filters_arm4.h"
+#else
- for (i=0;i<N;i++)
- {
- int deadm, deadn, deadd, deadidx, x1, y1, dead1, dead2, dead3, dead4, dead5, dead6;
- xi=SATURATE(x[i],805306368);
- yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368);
- nyi = -yi;
- y[i] = yi;
- __asm__ __volatile__ (
- "\tldrsh %6, [%1], #2\n"
- "\tsmull %8, %9, %4, %6\n"
- ".filterloop: \n"
- "\tldrsh %6, [%2], #2\n"
- "\tldr %10, [%0, #4]\n"
- "\tmov %8, %8, lsr #15\n"
- "\tsmull %7, %11, %5, %6\n"
- "\tadd %8, %8, %9, lsl #17\n"
- "\tldrsh %6, [%1], #2\n"
- "\tadd %10, %10, %8\n"
- "\tsmull %8, %9, %4, %6\n"
- "\tadd %10, %10, %7, lsr #15\n"
- "\tsubs %3, %3, #1\n"
- "\tadd %10, %10, %11, lsl #17\n"
- "\tstr %10, [%0], #4 \n"
- "\t bne .filterloop\n"
- "\tmov %8, %8, lsr #15\n"
- "\tadd %10, %8, %9, lsl #17\n"
- "\tldrsh %6, [%2], #2\n"
- "\tsmull %8, %9, %5, %6\n"
- "\tadd %10, %10, %8, lsr #15\n"
- "\tadd %10, %10, %9, lsl #17\n"
- "\tstr %10, [%0], #4 \n"
-
- : "=r" (deadm), "=r" (deadn), "=r" (deadd), "=r" (deadidx),
- "=r" (xi), "=r" (nyi), "=r" (dead1), "=r" (dead2),
- "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r" (dead6)
- : "0" (mem), "1" (num+1), "2" (den+1), "3" (ord-1), "4" (xi), "5" (nyi)
- : "cc", "memory");
+int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len)
+{
+ int i;
+ spx_sig_t max_val=1;
+ int sig_shift;
+ for (i=0;i<len;i++)
+ {
+ spx_sig_t tmp = x[i];
+ if (tmp<0)
+ tmp = -tmp;
+ if (tmp >= max_val)
+ max_val = tmp;
}
-}
-void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
-{
- int i,j;
- spx_sig_t xi,yi,nyi;
-
- for (i=0;i<N;i++)
+ sig_shift=0;
+ while (max_val>max_scale)
{
- int deadm, deadd, deadidx, dead1, dead2, dead3, dead4, dead5, dead6;
- xi=SATURATE(x[i],805306368);
- yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368);
- nyi = -yi;
- y[i] = yi;
- __asm__ __volatile__ (
- "\tldrsh %4, [%1], #2\n"
- "\tsmull %5, %6, %3, %4\n"
+ sig_shift++;
+ max_val >>= 1;
+ }
- ".iirloop: \n"
- "\tldr %7, [%0, #4]\n"
-
- "\tldrsh %4, [%1], #2\n"
- "\tmov %5, %5, lsr #15\n"
- "\tadd %8, %5, %6, lsl #17\n"
- "\tsmull %5, %6, %3, %4\n"
- "\tadd %7, %7, %8\n"
- "\tstr %7, [%0], #4 \n"
- "\tsubs %2, %2, #1\n"
- "\t bne .iirloop\n"
-
- "\tmov %5, %5, lsr #15\n"
- "\tadd %7, %5, %6, lsl #17\n"
- "\tstr %7, [%0], #4 \n"
-
- : "=r" (deadm), "=r" (deadd), "=r" (deadidx), "=r" (nyi),
- "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4),
- "=r" (dead5), "=r" (dead6)
- : "0" (mem), "1" (den+1), "2" (ord-1), "3" (nyi)
- : "cc", "memory");
+ for (i=0;i<len;i++)
+ y[i] = SHR(x[i], sig_shift);
- }
+ return sig_shift;
}
-#else
void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
{
int i,j;
Added: trunk/speex/libspeex/filters_arm4.h
===================================================================
--- trunk/speex/libspeex/filters_arm4.h 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/filters_arm4.h 2005-02-07 08:46:56 UTC (rev 8855)
@@ -0,0 +1,182 @@
+/* Copyright (C) 2004 Jean-Marc Valin
+ File: filters_arm4.h
+ ARM4-optimized filtering routines
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+int normalize16(const spx_sig_t *x, spx_word16_t *y, int max_scale, int len)
+{
+ int i;
+ spx_sig_t max_val=1;
+ int sig_shift;
+ int dead1, dead2, dead3, dead4, dead5, dead6;
+
+ __asm__ __volatile__ (
+ "\tmov %1, #1 \n"
+ "\tmov %3, #0 \n"
+
+ ".normalize16loop1: \n"
+
+ "\tldr %4, [%0], #4 \n"
+ "\tcmps %4, %1 \n"
+ "\tmovgt %1, %4 \n"
+ "\tcmps %4, %3 \n"
+ "\tmovlt %3, %4 \n"
+
+ "\tsubs %2, %2, #1 \n"
+ "\tbne .normalize16loop1\n"
+
+ "\trsb %3, %3, #0 \n"
+ "\tcmp %1, %3 \n"
+ "\tmovlt %1, %3 \n"
+ : "=r" (dead1), "=r" (max_val), "=r" (dead3), "=r" (dead4),
+ "=r" (dead5), "=r" (dead6)
+ : "0" (x), "2" (len)
+ : "cc", "memory");
+
+ sig_shift=0;
+ while (max_val>max_scale)
+ {
+ sig_shift++;
+ max_val >>= 1;
+ }
+
+ __asm__ __volatile__ (
+ ".normalize16loop: \n"
+
+ "\tldr %4, [%0], #4 \n"
+ "\tldr %5, [%0], #4 \n"
+ "\tmov %4, %4, asr %3 \n"
+ "\tstrh %4, [%1], #2 \n"
+ "\tldr %4, [%0], #4 \n"
+ "\tmov %5, %5, asr %3 \n"
+ "\tstrh %5, [%1], #2 \n"
+ "\tldr %5, [%0], #4 \n"
+ "\tmov %4, %4, asr %3 \n"
+ "\tstrh %4, [%1], #2 \n"
+ "\tsubs %2, %2, #1 \n"
+ "\tmov %5, %5, asr %3 \n"
+ "\tstrh %5, [%1], #2 \n"
+
+ "\tbge .normalize16loop\n"
+ : "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4),
+ "=r" (dead5), "=r" (dead6)
+ : "0" (x), "1" (y), "2" (len>>2), "3" (sig_shift)
+ : "cc", "memory");
+ return sig_shift;
+}
+
+
+void filter_mem2(const spx_sig_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
+{
+ int i,j;
+ spx_sig_t xi,yi,nyi;
+
+ for (i=0;i<N;i++)
+ {
+ int deadm, deadn, deadd, deadidx, x1, y1, dead1, dead2, dead3, dead4, dead5, dead6;
+ xi=SATURATE(x[i],805306368);
+ yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368);
+ nyi = -yi;
+ y[i] = yi;
+ __asm__ __volatile__ (
+ "\tldrsh %6, [%1], #2\n"
+ "\tsmull %8, %9, %4, %6\n"
+ ".filterloop: \n"
+ "\tldrsh %6, [%2], #2\n"
+ "\tldr %10, [%0, #4]\n"
+ "\tmov %8, %8, lsr #15\n"
+ "\tsmull %7, %11, %5, %6\n"
+ "\tadd %8, %8, %9, lsl #17\n"
+ "\tldrsh %6, [%1], #2\n"
+ "\tadd %10, %10, %8\n"
+ "\tsmull %8, %9, %4, %6\n"
+ "\tadd %10, %10, %7, lsr #15\n"
+ "\tsubs %3, %3, #1\n"
+ "\tadd %10, %10, %11, lsl #17\n"
+ "\tstr %10, [%0], #4 \n"
+ "\t bne .filterloop\n"
+
+ "\tmov %8, %8, lsr #15\n"
+ "\tadd %10, %8, %9, lsl #17\n"
+ "\tldrsh %6, [%2], #2\n"
+ "\tsmull %8, %9, %5, %6\n"
+ "\tadd %10, %10, %8, lsr #15\n"
+ "\tadd %10, %10, %9, lsl #17\n"
+ "\tstr %10, [%0], #4 \n"
+
+ : "=r" (deadm), "=r" (deadn), "=r" (deadd), "=r" (deadidx),
+ "=r" (xi), "=r" (nyi), "=r" (dead1), "=r" (dead2),
+ "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r" (dead6)
+ : "0" (mem), "1" (num+1), "2" (den+1), "3" (ord-1), "4" (xi), "5" (nyi)
+ : "cc", "memory");
+
+ }
+}
+
+void iir_mem2(const spx_sig_t *x, const spx_coef_t *den, spx_sig_t *y, int N, int ord, spx_mem_t *mem)
+{
+ int i,j;
+ spx_sig_t xi,yi,nyi;
+
+ for (i=0;i<N;i++)
+ {
+ int deadm, deadd, deadidx, dead1, dead2, dead3, dead4, dead5, dead6;
+ xi=SATURATE(x[i],805306368);
+ yi = SATURATE(ADD32(xi, SHL(mem[0],2)),805306368);
+ nyi = -yi;
+ y[i] = yi;
+ __asm__ __volatile__ (
+ "\tldrsh %4, [%1], #2\n"
+ "\tsmull %5, %6, %3, %4\n"
+
+ ".iirloop: \n"
+ "\tldr %7, [%0, #4]\n"
+
+ "\tldrsh %4, [%1], #2\n"
+ "\tmov %5, %5, lsr #15\n"
+ "\tadd %8, %5, %6, lsl #17\n"
+ "\tsmull %5, %6, %3, %4\n"
+ "\tadd %7, %7, %8\n"
+ "\tstr %7, [%0], #4 \n"
+ "\tsubs %2, %2, #1\n"
+ "\t bne .iirloop\n"
+
+ "\tmov %5, %5, lsr #15\n"
+ "\tadd %7, %5, %6, lsl #17\n"
+ "\tstr %7, [%0], #4 \n"
+
+ : "=r" (deadm), "=r" (deadd), "=r" (deadidx), "=r" (nyi),
+ "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4),
+ "=r" (dead5), "=r" (dead6)
+ : "0" (mem), "1" (den+1), "2" (ord-1), "3" (nyi)
+ : "cc", "memory");
+
+ }
+}
Added: trunk/speex/libspeex/fixed_arm4.h
===================================================================
--- trunk/speex/libspeex/fixed_arm4.h 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/fixed_arm4.h 2005-02-07 08:46:56 UTC (rev 8855)
@@ -0,0 +1,173 @@
+/* Copyright (C) 2004 Jean-Marc Valin */
+/**
+ @file fixed_generic.h
+ @brief ARM4 fixed-point operations
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FIXED_GENERIC_H
+#define FIXED_GENERIC_H
+
+#define SHR(a,shift) ((a) >> (shift))
+#define SHL(a,shift) ((a) << (shift))
+
+#define SATURATE(x,a) ((x)>(a) ? (a) : (x)<-(a) ? -(a) : (x))
+
+#define ADD16(a,b) ((short)((short)(a)+(short)(b)))
+#define SUB16(a,b) ((a)-(b))
+#define ADD32(a,b) ((a)+(b))
+#define SUB32(a,b) ((a)-(b))
+#define ADD64(a,b) ((a)+(b))
+
+#define PSHR(a,shift) (SHR((a)+(1<<((shift)-1)),shift))
+
+/* result fits in 16 bits */
+#define MULT16_16_16(a,b) ((((short)(a))*((short)(b))))
+
+#define MULT16_16(a,b) (((short)(a))*((short)(b)))
+
+
+
+
+#define MAC16_16(c,a,b) (ADD32((c),MULT16_16((a),(b))))
+#define MULT16_32_Q12(a,b) ADD32(MULT16_16((a),SHR((b),12)), SHR(MULT16_16((a),((b)&0x00000fff)),12))
+#define MULT16_32_Q13(a,b) ADD32(MULT16_16((a),SHR((b),13)), SHR(MULT16_16((a),((b)&0x00001fff)),13))
+#define MULT16_32_Q14(a,b) ADD32(MULT16_16((a),SHR((b),14)), SHR(MULT16_16((a),((b)&0x00003fff)),14))
+
+#define MULT16_32_Q11(a,b) ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11))
+#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
+
+#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
+#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
+
+
+#define MAC16_16_Q11(c,a,b) (ADD32((c),SHR(MULT16_16((a),(b)),11)))
+
+#define MULT16_16_Q11(a,b) (SHR(MULT16_16((a),(b)),11))
+#define MULT16_16_Q13(a,b) (SHR(MULT16_16((a),(b)),13))
+#define MULT16_16_Q14(a,b) (SHR(MULT16_16((a),(b)),14))
+#define MULT16_16_Q15(a,b) (SHR(MULT16_16((a),(b)),15))
+
+#define MULT16_16_P13(a,b) (SHR(ADD32(4096,MULT16_16((a),(b))),13))
+#define MULT16_16_P14(a,b) (SHR(ADD32(8192,MULT16_16((a),(b))),14))
+#define MULT16_16_P15(a,b) (SHR(ADD32(16384,MULT16_16((a),(b))),15))
+
+#define MUL_16_32_R15(a,bh,bl) ADD32(MULT16_16((a),(bh)), SHR(MULT16_16((a),(bl)),15))
+
+
+
+//#define DIV32_16(a,b) ((short)(((signed int)(a))/((short)(b))))
+inline short DIV3216(int a, int b)
+{
+ int res=0;
+ int dead1, dead2, dead3, dead4, dead5;
+ __asm__ __volatile__ (
+ "\teor %5, %0, %1\n"
+ "\tmovs %4, %0\n"
+ "\trsbmi %0, %0, #0 \n"
+ "\tmovs %4, %1\n"
+ "\trsbmi %1, %1, #0 \n"
+ "\tmov %4, #1\n"
+
+ "\tsubs %3, %0, %1, asl #14 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #14 \n"
+
+ "\tsubs %3, %0, %1, asl #13 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #13 \n"
+
+ "\tsubs %3, %0, %1, asl #12 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #12 \n"
+
+ "\tsubs %3, %0, %1, asl #11 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #11 \n"
+
+ "\tsubs %3, %0, %1, asl #10 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #10 \n"
+
+ "\tsubs %3, %0, %1, asl #9 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #9 \n"
+
+ "\tsubs %3, %0, %1, asl #8 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #8 \n"
+
+ "\tsubs %3, %0, %1, asl #7 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #7 \n"
+
+ "\tsubs %3, %0, %1, asl #6 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #6 \n"
+
+ "\tsubs %3, %0, %1, asl #5 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #5 \n"
+
+ "\tsubs %3, %0, %1, asl #4 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #4 \n"
+
+ "\tsubs %3, %0, %1, asl #3 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #3 \n"
+
+ "\tsubs %3, %0, %1, asl #2 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #2 \n"
+
+ "\tsubs %3, %0, %1, asl #1 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4, asl #1 \n"
+
+ "\tsubs %3, %0, %1 \n"
+ "\tmovpl %0, %3 \n"
+ "\torrpl %2, %2, %4 \n"
+
+ "\tmovs %5, %5, lsr #31 \n"
+ "\trsbne %2, %2, #0 \n"
+ : "=r" (dead1), "=r" (dead2), "=r" (res),
+ "=r" (dead3), "=r" (dead4), "=r" (dead5)
+ : "0" (a), "1" (b), "2" (res)
+ : "cc"
+ );
+ return res;
+}
+
+#define DIV32(a,b) (((signed int)(a))/((signed int)(b)))
+
+
+
+#endif
Modified: trunk/speex/libspeex/ltp.c
===================================================================
--- trunk/speex/libspeex/ltp.c 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/ltp.c 2005-02-07 08:46:56 UTC (rev 8855)
@@ -45,63 +45,12 @@
#ifdef _USE_SSE
#include "ltp_sse.h"
+#elif defined (ARM4_ASM) || defined(ARM5E_ASM)
+#include "ltp_arm4.h"
#else
-#if defined(ARM4_ASM) || defined(ARM5E_ASM)
static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
{
- spx_word32_t sum1=0,sum2=0;
- spx_word16_t *deadx, *deady;
- int deadlen, dead1, dead2, dead3, dead4, dead5, dead6;
- __asm__ __volatile__ (
- "\tldrsh %5, [%0], #2 \n"
- "\tldrsh %6, [%1], #2 \n"
- ".inner_prod_loop:\n"
- "\tsub %7, %7, %7\n"
- "\tsub %10, %10, %10\n"
-
- "\tldrsh %8, [%0], #2 \n"
- "\tldrsh %9, [%1], #2 \n"
- "\tmla %7, %5, %6, %7\n"
- "\tldrsh %5, [%0], #2 \n"
- "\tldrsh %6, [%1], #2 \n"
- "\tmla %10, %8, %9, %10\n"
- "\tldrsh %8, [%0], #2 \n"
- "\tldrsh %9, [%1], #2 \n"
- "\tmla %7, %5, %6, %7\n"
- "\tldrsh %5, [%0], #2 \n"
- "\tldrsh %6, [%1], #2 \n"
- "\tmla %10, %8, %9, %10\n"
-
- "\tldrsh %8, [%0], #2 \n"
- "\tldrsh %9, [%1], #2 \n"
- "\tmla %7, %5, %6, %7\n"
- "\tldrsh %5, [%0], #2 \n"
- "\tldrsh %6, [%1], #2 \n"
- "\tmla %10, %8, %9, %10\n"
- "\tldrsh %8, [%0], #2 \n"
- "\tldrsh %9, [%1], #2 \n"
- "\tmla %7, %5, %6, %7\n"
- "\tldrsh %5, [%0], #2 \n"
- "\tldrsh %6, [%1], #2 \n"
- "\tmla %10, %8, %9, %10\n"
-
- "\tsubs %4, %4, #1\n"
- "\tadd %2, %2, %7, asr #5\n"
- "\tadd %3, %3, %10, asr #5\n"
- "\tbne .inner_prod_loop\n"
- : "=r" (deadx), "=r" (deady), "=r" (sum1), "=r" (sum2), "=r" (deadlen),
- "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r"
- : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3)
- : "cc", "memory"
- );
- return (sum1+sum2)>>1;
-}
-
-
-#else
-static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
-{
int i;
spx_word32_t sum=0;
for (i=0;i<len;i+=4)
@@ -115,9 +64,8 @@
}
return sum;
}
-#endif
-#if 0
+#if 0 /* Enable this for machines with enough registers (i.e. not x86) */
static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
{
int i,j;
@@ -181,8 +129,7 @@
}
#else
-static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word
-32_t *corr, int len, int nb_pitch, char *stack)
+static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *corr, int len, int nb_pitch, char *stack)
{
int i;
for (i=0;i<nb_pitch;i++)
Added: trunk/speex/libspeex/ltp_arm4.h
===================================================================
--- trunk/speex/libspeex/ltp_arm4.h 2005-02-07 07:59:33 UTC (rev 8854)
+++ trunk/speex/libspeex/ltp_arm4.h 2005-02-07 08:46:56 UTC (rev 8855)
@@ -0,0 +1,164 @@
+/* Copyright (C) 2004 Jean-Marc Valin
+ File: ltp.c
+ Lont-Term Prediction functions (SSE version)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+static spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int
+{
+ spx_word32_t sum1=0,sum2=0;
+ spx_word16_t *deadx, *deady;
+ int deadlen, dead1, dead2, dead3, dead4, dead5, dead6;
+ __asm__ __volatile__ (
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ ".inner_prod_loop:\n"
+ "\tsub %7, %7, %7\n"
+ "\tsub %10, %10, %10\n"
+
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+ "\tldrsh %8, [%0], #2 \n"
+ "\tldrsh %9, [%1], #2 \n"
+ "\tmla %7, %5, %6, %7\n"
+ "\tldrsh %5, [%0], #2 \n"
+ "\tldrsh %6, [%1], #2 \n"
+ "\tmla %10, %8, %9, %10\n"
+
+ "\tsubs %4, %4, #1\n"
+ "\tadd %2, %2, %7, asr #5\n"
+ "\tadd %3, %3, %10, asr #5\n"
+ "\tbne .inner_prod_loop\n"
+ : "=r" (deadx), "=r" (deady), "=r" (sum1), "=r" (sum2), "=r" (deadlen),
+ "=r" (dead1), "=r" (dead2), "=r" (dead3), "=r" (dead4), "=r" (dead5), "=r"
+ : "0" (x), "1" (y), "2" (sum1), "3" (sum2), "4" (len>>3)
+ : "cc", "memory"
+ );
+ return (sum1+sum2)>>1;
+}
+
+static void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word
+{
+ int i,j;
+ for (i=0;i<nb_pitch;i+=4)
+ {
+ /* Compute correlation*/
+ //corr[nb_pitch-1-i]=inner_prod(x, _y+i, len);
+ spx_word32_t sum1=0;
+ spx_word32_t sum2=0;
+ spx_word32_t sum3=0;
+ spx_word32_t sum4=0;
+ const spx_word16_t *y = _y+i;
+ const spx_word16_t *x = _x;
+ spx_word32_t y0, y1, y2, y3;
+ y0=*y++;
+ y1=*y++;
+ y2=*y++;
+ y3=*y++;
+ for (j=0;j<len;j+=4)
+ {
+ spx_word32_t part1, part2, part3, part4, x0;
+ spx_word16_t z0,z1,z2,z3;
+ __asm__ __volatile__ (
+ "\tldrsh %10, [%8], #2 \n"
+ "\tmul %4, %10, %0 \n"
+ "\tmul %5, %10, %1 \n"
+ "\tmul %6, %10, %2 \n"
+ "\tmul %7, %10, %3 \n"
+
+ "\tldrsh %10, [%8], #2 \n"
+ "\tldrsh %0, [%9], #2 \n"
+ "\tmla %4, %10, %1, %4 \n"
+ "\tmla %5, %10, %2, %5 \n"
+ "\tmla %6, %10, %3, %6 \n"
+ "\tmla %7, %10, %0, %7 \n"
+
+ "\tldrsh %10, [%8], #2 \n"
+ "\tldrsh %1, [%9], #2 \n"
+ "\tmla %4, %10, %2, %4 \n"
+ "\tmla %5, %10, %3, %5 \n"
+ "\tmla %6, %10, %0, %6 \n"
+ "\tmla %7, %10, %1, %7 \n"
+
+ "\tldrsh %10, [%8], #2 \n"
+ "\tldrsh %2, [%9], #2 \n"
+ "\tmla %4, %10, %3, %4 \n"
+ "\tmla %5, %10, %0, %5 \n"
+ "\tmla %6, %10, %1, %6 \n"
+ "\tmla %7, %10, %2, %7 \n"
+
+ "\tldrsh %3, [%9], #2 \n"
+
+
+ "\tldr %10, %11 \n"
+ "\tadd %4, %10, %4, asr #6 \n"
+ "\tldr %10, %12 \n"
+ "\tstr %4, %11 \n"
+ "\tadd %5, %10, %5, asr #6 \n"
+ "\tldr %10, %13 \n"
+ "\tstr %5, %12 \n"
+ "\tadd %6, %10, %6, asr #6 \n"
+ "\tldr %10, %14 \n"
+ "\tstr %6, %13 \n"
+ "\tadd %7, %10, %7, asr #6 \n"
+ "\tstr %7, %14 \n"
+
+ : "=r" (y0), "=r" (y1), "=r" (y2), "=r" (y3),
+ "=r" (part1), "=r" (part2), "=r" (part3), "=r" (part4),
+ "=r" (x), "=r" (y), "=r" (x0),
+ "=m" (sum1), "=m" (sum2), "=m" (sum3), "=m" (sum4)
+ : "0" (y0), "1" (y1), "2" (y2), "3" (y3),
+ "8" (x), "9" (y),
+ "11" (sum1), "12" (sum2), "13" (sum3), "14" (sum4)
+ : "cc", "memory"
+ );
+ }
+ corr[nb_pitch-1-i]=sum1;
+ corr[nb_pitch-2-i]=sum2;
+ corr[nb_pitch-3-i]=sum3;
+ corr[nb_pitch-4-i]=sum4;
+ }
+
+}
More information about the commits
mailing list