[xiph-commits] r11316 - trunk/speex/libspeex
jm at svn.xiph.org
jm at svn.xiph.org
Sat Apr 29 06:01:08 PDT 2006
Author: jm
Date: 2006-04-29 06:00:54 -0700 (Sat, 29 Apr 2006)
New Revision: 11316
Modified:
trunk/speex/libspeex/filters.c
trunk/speex/libspeex/filters_bfin.h
Log:
Blackfin assembly version of filter_mem16
Modified: trunk/speex/libspeex/filters.c
===================================================================
--- trunk/speex/libspeex/filters.c 2006-04-29 07:39:20 UTC (rev 11315)
+++ trunk/speex/libspeex/filters.c 2006-04-29 13:00:54 UTC (rev 11316)
@@ -277,6 +277,7 @@
#endif
#ifdef FIXED_POINT
+#ifndef OVERRIDE_FILTER_MEM16
void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem)
{
int i,j;
@@ -294,6 +295,7 @@
y[i] = yi;
}
}
+#endif
#else
void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem)
{
Modified: trunk/speex/libspeex/filters_bfin.h
===================================================================
--- trunk/speex/libspeex/filters_bfin.h 2006-04-29 07:39:20 UTC (rev 11315)
+++ trunk/speex/libspeex/filters_bfin.h 2006-04-29 13:00:54 UTC (rev 11316)
@@ -103,26 +103,26 @@
"P0 = %3;\n\t"
"I0 = P0;\n\t"
- "B0 = P0;\n\t"
+ "B0 = P0;\n\t" /* numden */
"L0 = 0;\n\t"
- "P2 = %0;\n\t"
+ "P2 = %0;\n\t" /* Fused xy */
"I2 = P2;\n\t"
"L2 = 0;\n\t"
- "P4 = %6;\n\t"
- "P0 = %1;\n\t"
- "P1 = %2;\n\t"
+ "P4 = %6;\n\t" /* mem */
+ "P0 = %1;\n\t" /* _x */
+ "P1 = %2;\n\t" /* _y */
/* First sample */
"R1 = [P4++];\n\t"
- "R1 <<= 1;\n\t"
- "R2 = [P0++];\n\t"
+ "R1 <<= 1;\n\t" /* shift mem */
+ "R2 = [P0++];\n\t" /* load x[0] */
"R1 = R1 + R2;\n\t"
- "[P1++] = R1;\n\t"
+ "[P1++] = R1;\n\t" /* store y[0] */
"R1 <<= 2;\n\t"
"R2 <<= 2;\n\t"
- "R2 = PACK(R1.H, R2.H);\n\t"
+ "R2 = PACK(R1.H, R2.H);\n\t" /* pack x16 and y16 */
"[P2] = R2;\n\t"
/* Samples 1 to ord-1 (using memory) */
@@ -147,13 +147,13 @@
"LOOP_END filter_start_inner%=;\n\t"
"A0 += A1;\n\t"
"R4 = A0;\n\t"
- "R4 <<= 1;\n\t"
- "R2 = [P0++];\n\t"
+ "R4 <<= 1;\n\t" /* shift mem */
+ "R2 = [P0++];\n\t" /* load x */
"R4 = R4 + R2;\n\t"
- "[P1++] = R4;\n\t"
+ "[P1++] = R4;\n\t" /* store y */
"R4 <<= 2;\n\t"
"R2 <<= 2;\n\t"
- "R2 = PACK(R4.H, R2.H);\n\t"
+ "R2 = PACK(R4.H, R2.H);\n\t" /* pack x16 and y16 */
"[P2] = R2;\n\t"
"LOOP_END filter_start%=;\n\t"
@@ -161,14 +161,14 @@
/* Samples ord to N*/
"R0 = %5;\n\t"
"R0 <<= 1;\n\t"
- "I0 = B0;\n\t"
+ "I0 = B0;\n\t" /* numden */
"R0 <<= 1;\n\t"
"L0 = R0;\n\t"
- "R0 = %5;\n\t"
- "R2 = %4;\n\t"
+ "R0 = %5;\n\t" /* org */
+ "R2 = %4;\n\t" /* N */
"R2 = R2 - R0;\n\t"
- "R4 = [I0++];\n\t"
+ "R4 = [I0++];\n\t" /* numden */
"LC0 = R2;\n\t"
"P3 = R0;\n\t"
"R0 <<= 2;\n\t"
@@ -176,7 +176,7 @@
"I2 = P2;\n\t"
"M0 = R0;\n\t"
"A1 = A0 = 0;\n\t"
- "R5 = [I2--];\n\t"
+ "R5 = [I2--];\n\t" /* load xy */
"LOOP filter_mid%= LC0;\n\t"
"LOOP_BEGIN filter_mid%=;\n\t"
"LOOP filter_mid_inner%= LC1=P3;\n\t"
@@ -184,9 +184,9 @@
"A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
"LOOP_END filter_mid_inner%=;\n\t"
"R0 = (A0 += A1) || I2 += M0;\n\t"
- "R0 = R0 << 1 || R5 = [P0++];\n\t"
+ "R0 = R0 << 1 || R5 = [P0++];\n\t" /* load x */
"R0 = R0 + R5;\n\t"
- "R0 = R0 << 2 || [P1++] = R0;\n\t"
+ "R0 = R0 << 2 || [P1++] = R0;\n\t" /* shift y | store y */
"R5 = R5 << 2;\n\t"
"R5 = PACK(R0.H, R5.H);\n\t"
"A1 = A0 = 0 || [I2--] = R5\n\t"
@@ -222,8 +222,148 @@
}
+#define OVERRIDE_FILTER_MEM16
+void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *_y, int N, int ord, spx_mem_t *mem)
+{
+ spx_word32_t xy2[N+1];
+ spx_word32_t *xy = xy2+1;
+ spx_word32_t numden_a[2*ord+2];
+ spx_word16_t *numden = (spx_word16_t*) numden_a;
+ int i;
+ for (i=0;i<ord;i++)
+ {
+ numden[2*i] = num[i];
+ numden[2*i+1] = den[i];
+ }
+ __asm__ __volatile__
+ (
+ /* Register setup */
+ "R0 = %5;\n\t" /*ord */
+
+ "P0 = %3;\n\t"
+ "I0 = P0;\n\t"
+ "B0 = P0;\n\t" /* numden */
+ "L0 = 0;\n\t"
+
+ "P2 = %0;\n\t" /* Fused xy */
+ "I2 = P2;\n\t"
+ "L2 = 0;\n\t"
+
+ "P4 = %6;\n\t" /* mem */
+ "P0 = %1;\n\t" /* _x */
+ "P1 = %2;\n\t" /* _y */
+
+ /* First sample */
+ "R1 = [P4++];\n\t"
+ "R1 >>>= 13;\n\t" /* shift mem */
+ // "R2.H = 0;\n\t" /* FIXME: do we need this */
+ "R2 = W[P0++];\n\t" /* load x[0] */
+ "R1.L = R1.L + R2.L;\n\t"
+ "W[P1++] = R1;\n\t" /* store y[0] */
+ //"R1 <<= 2;\n\t"
+ //"R2 <<= 2;\n\t"
+ "R2 = PACK(R1.L, R2.L);\n\t" /* pack x16 and y16 */
+ "[P2] = R2;\n\t"
+
+ /* Samples 1 to ord-1 (using memory) */
+ "R0 += -1;\n\t"
+ "R3 = 0;\n\t"
+ "LC0 = R0;\n\t"
+ "LOOP filter_start%= LC0;\n\t"
+ "LOOP_BEGIN filter_start%=;\n\t"
+ "R3 += 1;\n\t"
+ "LC1 = R3;\n\t"
+
+ "R1 = [P4++];\n\t"
+ "A1 = R1;\n\t"
+ "A0 = 0;\n\t"
+ "I0 = B0;\n\t"
+ "I2 = P2;\n\t"
+ "P2 += 4;\n\t"
+ "R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP filter_start_inner%= LC1;\n\t"
+ "LOOP_BEGIN filter_start_inner%=;\n\t"
+ "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP_END filter_start_inner%=;\n\t"
+ "A0 += A1;\n\t"
+ "R4 = A0;\n\t"
+ "R4 >>>= 13;\n\t" /* shift mem */
+ "R2 = W[P0++];\n\t" /* load x */
+ "R4.L = R4.L + R2.L;\n\t"
+ "W[P1++] = R4;\n\t" /* store y */
+ //"R4 <<= 2;\n\t"
+ //"R2 <<= 2;\n\t"
+ "R2 = PACK(R4.L, R2.L);\n\t" /* pack x16 and y16 */
+ "[P2] = R2;\n\t"
+ "LOOP_END filter_start%=;\n\t"
+ /* Samples ord to N*/
+ "R0 = %5;\n\t"
+ "R0 <<= 1;\n\t"
+ "I0 = B0;\n\t" /* numden */
+ "R0 <<= 1;\n\t"
+ "L0 = R0;\n\t"
+
+ "R0 = %5;\n\t" /* org */
+ "R2 = %4;\n\t" /* N */
+ "R2 = R2 - R0;\n\t"
+ "R4 = [I0++];\n\t" /* numden */
+ "LC0 = R2;\n\t"
+ "P3 = R0;\n\t"
+ "R0 <<= 2;\n\t"
+ "R0 += 8;\n\t"
+ "I2 = P2;\n\t"
+ "M0 = R0;\n\t"
+ "A1 = A0 = 0;\n\t"
+ "R5 = [I2--];\n\t" /* load xy */
+ "LOOP filter_mid%= LC0;\n\t"
+ "LOOP_BEGIN filter_mid%=;\n\t"
+ "LOOP filter_mid_inner%= LC1=P3;\n\t"
+ "LOOP_BEGIN filter_mid_inner%=;\n\t"
+ "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP_END filter_mid_inner%=;\n\t"
+ "R0 = (A0 += A1) || I2 += M0;\n\t"
+ "R0 = R0 >>> 13 || R5 = W[P0++];\n\t" /* load x */
+ "R0.L = R0.L + R5.L;\n\t"
+ "W[P1++] = R0;\n\t" /* shift y | store y */
+ //"R5 = R5 << 2;\n\t"
+ "R5 = PACK(R0.L, R5.L);\n\t"
+ "A1 = A0 = 0 || [I2--] = R5\n\t"
+ "LOOP_END filter_mid%=;\n\t"
+ "I2 += 4;\n\t"
+ "P2 = I2;\n\t"
+ /* Update memory */
+ "P4 = %6;\n\t"
+ "R0 = %5;\n\t"
+ "LC0 = R0;\n\t"
+ "P0 = B0;\n\t"
+ "A1 = A0 = 0;\n\t"
+ "LOOP mem_update%= LC0;\n\t"
+ "LOOP_BEGIN mem_update%=;\n\t"
+ "I2 = P2;\n\t"
+ "I0 = P0;\n\t"
+ "P0 += 4;\n\t"
+ "R0 = LC0;\n\t"
+ "LC1 = R0;\n\t"
+ "R5 = [I2--] || R4 = [I0++];\n\t"
+ "LOOP mem_accum%= LC1;\n\t"
+ "LOOP_BEGIN mem_accum%=;\n\t"
+ "A1 -= R4.H*R5.H, A0 += R4.L*R5.L (IS) || R4 = [I0++] || R5 = [I2--];\n\t"
+ "LOOP_END mem_accum%=;\n\t"
+ "R0 = (A0 += A1);\n\t"
+ "A1 = A0 = 0 || [P4++] = R0;\n\t"
+ "LOOP_END mem_update%=;\n\t"
+ "L0 = 0;\n\t"
+ : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
+ : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
+ );
+
+}
+
+
+
+
#define OVERRIDE_IIR_MEM2
void iir_mem2(const spx_sig_t *_x, const spx_coef_t *den, spx_sig_t *_y, int N, int ord, spx_mem_t *mem)
{
More information about the commits
mailing list