[Speex-dev] [PATCH] Blackfin: cleanup astat/cc/hardware loop asm clobbers
Mike Frysinger
vapier at gentoo.org
Fri Apr 24 16:15:42 PDT 2009
Most asm statements clobber ASTAT bits (shifts, maxes, etc...) but do
declare the register as clobbered. Same thing with CC in a few places.
Some places make an attempt at clobbering some hardware loop registers,
but it's very incomplete compared with how many asm statements actually
use hardware loops.
Signed-off-by: Mike Frysinger <vapier at gentoo.org>
---
libspeex/bfin.h | 15 +++++++++++++++
libspeex/cb_search_bfin.h | 9 ++++-----
libspeex/filters_bfin.h | 15 ++++++++++-----
libspeex/fixed_bfin.h | 15 +++++++++------
libspeex/lpc_bfin.h | 5 ++++-
libspeex/lsp_bfin.h | 2 +-
libspeex/ltp_bfin.h | 25 ++++++++++---------------
libspeex/misc_bfin.h | 4 +++-
libspeex/quant_lsp_bfin.h | 7 +++++--
libspeex/vq_bfin.h | 7 +++++--
10 files changed, 66 insertions(+), 38 deletions(-)
create mode 100644 libspeex/bfin.h
diff --git a/libspeex/bfin.h b/libspeex/bfin.h
new file mode 100644
index 0000000..b934cf2
--- /dev/null
+++ b/libspeex/bfin.h
@@ -0,0 +1,15 @@
+/* Common Blackfin assembly defines
+ *
+ * Copyright (C) 2005-2009 Analog Devices
+ */
+
+#if __GNUC__ <= 3
+/* GCC-3.4 and older did not use hardware loops and thus did not have
+ * register constraints for declaring clobbers.
+ */
+# define BFIN_HWLOOP0_REGS
+# define BFIN_HWLOOP1_REGS
+#else
+# define BFIN_HWLOOP0_REGS , "LB0", "LT0", "LC0"
+# define BFIN_HWLOOP1_REGS , "LB1", "LT1", "LC1"
+#endif
diff --git a/libspeex/cb_search_bfin.h b/libspeex/cb_search_bfin.h
index ae9cf83..edb1eca 100644
--- a/libspeex/cb_search_bfin.h
+++ b/libspeex/cb_search_bfin.h
@@ -33,6 +33,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "bfin.h"
+
#define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK
void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
{
@@ -73,10 +75,7 @@ void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *
:
: "m" (subvect_size), "m" (shape_cb), "m" (r), "m" (resp), "m" (E)
: "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0",
- "L1", "A0", "A1", "memory"
-#if !(__GNUC__ == 3)
- , "LC0", "LC1" /* gcc 3.4 doesn't know about LC registers */
-#endif
+ "L1", "A0", "A1", "memory", "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
shape_cb += subvect_size;
resp += subvect_size;
@@ -107,6 +106,6 @@ static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *
"LOOP_END tupdate%=;\n\t"
:
: "a" (t), "a" (r), "d" (g), "a" (len)
- : "R0", "R1", "R2", "A1", "I0", "I1", "L0", "L1"
+ : "R0", "R1", "R2", "A1", "I0", "I1", "L0", "L1", "ASTAT" BFIN_HWLOOP0_REGS
);
}
diff --git a/libspeex/filters_bfin.h b/libspeex/filters_bfin.h
index 1e433ee..ccd57b9 100644
--- a/libspeex/filters_bfin.h
+++ b/libspeex/filters_bfin.h
@@ -32,6 +32,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "bfin.h"
+
#define OVERRIDE_NORMALIZE16
int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
{
@@ -50,7 +52,7 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le
"LOOP_END norm_max%=;\n\t"
: "=&d" (max_val)
: "a" (x), "a" (len)
- : "R1", "R2"
+ : "R1", "R2", "ASTAT" BFIN_HWLOOP0_REGS
);
sig_shift=0;
@@ -74,7 +76,7 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le
"R1 = ASHIFT R0 by %2.L;\n\t"
"W[P1++] = R1;\n\t"
: : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1)
- : "I0", "L0", "P1", "R0", "R1", "memory"
+ : "I0", "L0", "P1", "R0", "R1", "memory", "ASTAT" BFIN_HWLOOP0_REGS
);
return sig_shift;
}
@@ -219,7 +221,8 @@ void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_
"LOOP_END mem_update%=;\n\t"
"L0 = 0;\n\t"
: : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
- : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
+ : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory",
+ "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
}
@@ -345,7 +348,8 @@ void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y,
"LOOP_END mem_update%=;\n\t"
"L1 = 0;\n\t"
: : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
- : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
+ : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory",
+ "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
}
@@ -426,7 +430,8 @@ void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, cons
"LOOP_END samples%=;\n\t"
: "=a" (ytmp2), "=a" (y)
: "a" (awk2), "a" (ak), "d" (ord), "m" (N), "0" (ytmp2), "1" (y)
- : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3", "A0", "A1"
+ : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3",
+ "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
}
diff --git a/libspeex/fixed_bfin.h b/libspeex/fixed_bfin.h
index aa26f6a..9eb21e3 100644
--- a/libspeex/fixed_bfin.h
+++ b/libspeex/fixed_bfin.h
@@ -36,6 +36,8 @@
#ifndef FIXED_BFIN_H
#define FIXED_BFIN_H
+#include "bfin.h"
+
#undef PDIV32_16
static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
{
@@ -57,7 +59,7 @@ static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
"%0 = R0;\n\t"
: "=m" (res)
: "m" (a), "m" (bb)
- : "P0", "R0", "R1", "cc");
+ : "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
return res;
}
@@ -84,7 +86,7 @@ static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
"%0 = R0;\n\t"
: "=m" (res)
: "m" (a), "m" (bb)
- : "P0", "R0", "R1", "cc");
+ : "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
return res;
}
@@ -98,6 +100,7 @@ static inline spx_word16_t MAX16(spx_word16_t a, spx_word16_t b)
"%0 = MAX(%1,%2);"
: "=d" (res)
: "%d" (a), "d" (b)
+ : "ASTAT"
);
return res;
}
@@ -113,7 +116,7 @@ static inline spx_word32_t MULT16_32_Q15(spx_word16_t a, spx_word32_t b)
"%0 = (A1 += %2.L*%1.H) ;\n\t"
: "=&W" (res), "=&d" (b)
: "d" (a), "1" (b)
- : "A1"
+ : "A1", "ASTAT"
);
return res;
}
@@ -130,7 +133,7 @@ static inline spx_word32_t MAC16_32_Q15(spx_word32_t c, spx_word16_t a, spx_word
"%0 = %0 + %4;\n\t"
: "=&W" (res), "=&d" (b)
: "d" (a), "1" (b), "d" (c)
- : "A1"
+ : "A1", "ASTAT"
);
return res;
}
@@ -147,7 +150,7 @@ static inline spx_word32_t MULT16_32_Q14(spx_word16_t a, spx_word32_t b)
"%0 = (A1 += %1.L*%2.H);\n\t"
: "=W" (res), "=d" (a), "=d" (b)
: "1" (a), "2" (b)
- : "A1"
+ : "A1", "ASTAT"
);
return res;
}
@@ -165,7 +168,7 @@ static inline spx_word32_t MAC16_32_Q14(spx_word32_t c, spx_word16_t a, spx_word
"%0 = %0 + %4;\n\t"
: "=&W" (res), "=&d" (b)
: "d" (a), "1" (b), "d" (c)
- : "A1"
+ : "A1", "ASTAT"
);
return res;
}
diff --git a/libspeex/lpc_bfin.h b/libspeex/lpc_bfin.h
index 7310ffb..d7d11c0 100644
--- a/libspeex/lpc_bfin.h
+++ b/libspeex/lpc_bfin.h
@@ -33,6 +33,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "bfin.h"
+
#define OVERRIDE_SPEEX_AUTOCORR
void _spx_autocorr(
const spx_word16_t *x, /* in: [0...n-1] samples x */
@@ -107,7 +109,8 @@ int n
"P0 += 4;\n\t"
"LOOP_END pitch%=;\n\t"
: : "m" (xs), "m" (x), "m" (ac32top), "m" (N_lag), "m" (lag_1), "m" (nshift)
- : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "R4", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
+ : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "R4", "I0", "I1", "L0", "L1", "B0", "B1", "memory",
+ "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
d=0;
for (j=0;j<n;j++)
diff --git a/libspeex/lsp_bfin.h b/libspeex/lsp_bfin.h
index 20e5052..530367c 100644
--- a/libspeex/lsp_bfin.h
+++ b/libspeex/lsp_bfin.h
@@ -79,7 +79,7 @@ static inline spx_word32_t cheb_poly_eva(
"%0 = R3;\n\t"
: "=&d" (sum)
: "a" (x), "a" (&coef[m]), "a" (m-1)
- : "R0", "R1", "R3", "R2", "R4", "R5", "P0", "P1"
+ : "R0", "R1", "R3", "R2", "R4", "R5", "P0", "P1", "ASTAT" BFIN_HWLOOP0_REGS
);
return sum;
}
diff --git a/libspeex/ltp_bfin.h b/libspeex/ltp_bfin.h
index b530f85..b7edd37 100644
--- a/libspeex/ltp_bfin.h
+++ b/libspeex/ltp_bfin.h
@@ -33,6 +33,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "bfin.h"
+
#define OVERRIDE_INNER_PROD
spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
{
@@ -57,7 +59,7 @@ spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
"%0 = R0;\n\t"
: "=m" (sum)
: "m" (x), "m" (y), "d" (len-1)
- : "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3"
+ : "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3", "ASTAT" BFIN_HWLOOP0_REGS
);
return sum;
}
@@ -104,7 +106,8 @@ void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *c
"LOOP_END pitch%=;\n\t"
"L0 = 0;\n\t"
: : "m" (_x), "m" (_y), "m" (corr), "m" (len), "m" (nb_pitch)
- : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
+ : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory",
+ "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
}
@@ -147,7 +150,7 @@ static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g,
"%0 = A0;\n\t"
: "=&D" (sum), "=a" (C)
: "d" (g[0]), "d" (g[1]), "d" (g[2]), "d" (pitch_control), "1" (C)
- : "R0", "R1", "R2", "A0"
+ : "R0", "R1", "R2", "A0", "ASTAT"
);
return sum;
}
@@ -201,10 +204,7 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p
"eu2: [P0++] = R2;\n\t"
: : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]),
"a" (end-start)
- : "P0", "I1", "I2", "R0", "R1", "R2", "R3"
-#if (__GNUC__ == 4)
- , "LC1"
-#endif
+ : "P0", "I1", "I2", "R0", "R1", "R2", "R3", "ASTAT" BFIN_HWLOOP1_REGS
);
pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
@@ -245,10 +245,8 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p
" %0 = P0;\n\t"
: "=&d" (pitch[0])
: "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start)
- : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5"
-#if (__GNUC__ == 4)
- , "LC1"
-#endif
+ : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5",
+ "ASTAT", "CC" BFIN_HWLOOP1_REGS
);
}
@@ -407,10 +405,7 @@ static int pitch_gain_search_3tap_vq(
: "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain),
"b" (-VERY_LARGE32)
: "R0", "R1", "R2", "R3", "R4", "P0",
- "P1", "I1", "L1", "A0", "B0"
-#if (__GNUC__ == 4)
- , "LC1"
-#endif
+ "P1", "I1", "L1", "A0", "B0", "CC", "ASTAT" BFIN_HWLOOP1_REGS
);
return best_cdbk;
diff --git a/libspeex/misc_bfin.h b/libspeex/misc_bfin.h
index 77b082c..3c8c09d 100644
--- a/libspeex/misc_bfin.h
+++ b/libspeex/misc_bfin.h
@@ -33,6 +33,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "bfin.h"
+
#define OVERRIDE_SPEEX_MOVE
void *speex_move (void *dest, void *src, int n)
{
@@ -48,7 +50,7 @@ void *speex_move (void *dest, void *src, int n)
"[%1++] = R0;\n\t"
: "=a" (src), "=a" (dest)
: "a" ((n>>2)-1), "0" (src), "1" (dest)
- : "R0", "I0", "L0", "memory"
+ : "R0", "I0", "L0", "memory" BFIN_HWLOOP0_REGS
);
return dest;
}
diff --git a/libspeex/quant_lsp_bfin.h b/libspeex/quant_lsp_bfin.h
index 087b466..efd23f5 100644
--- a/libspeex/quant_lsp_bfin.h
+++ b/libspeex/quant_lsp_bfin.h
@@ -36,6 +36,8 @@
#define OVERRIDE_LSP_QUANT
#ifdef OVERRIDE_LSP_QUANT
+#include "bfin.h"
+
/*
Note http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
well tell you all the magic resgister constraints used below
@@ -84,7 +86,8 @@ static int lsp_quant(
" L0 = 0;\n\t"
: "=&d" (best_dist), "=&d" (best_id)
: "a" (x), "b" (cdbk), "a" (nbVec), "a" (nbDim)
- : "I0", "P2", "R0", "R1", "R2", "R3", "R5", "L0", "B0", "A0"
+ : "I0", "P2", "R0", "R1", "R2", "R3", "R5", "L0", "B0", "A0",
+ "CC", "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
for (j=0;j<nbDim;j++) {
@@ -154,7 +157,7 @@ static int lsp_weight_quant(
: "=&d" (best_dist), "=&d" (best_id)
: "a" (x), "a" (weight), "b" (cdbk), "a" (nbVec), "a" (nbDim)
: "I0", "I1", "P2", "R0", "R1", "R2", "R3", "R5", "A1",
- "L0", "L1", "B0", "B1"
+ "L0", "L1", "B0", "B1", "CC", "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
for (j=0;j<nbDim;j++) {
diff --git a/libspeex/vq_bfin.h b/libspeex/vq_bfin.h
index 2cc9ea5..a4d2d2f 100644
--- a/libspeex/vq_bfin.h
+++ b/libspeex/vq_bfin.h
@@ -33,6 +33,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "bfin.h"
+
#define OVERRIDE_VQ_NBEST
void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
{
@@ -66,7 +68,8 @@ void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entri
"LOOP_END entries_loop%=;\n\t"
: "=&D" (dist), "=&a" (codebook), "=&d" (best_dist[0]), "=&d" (nbest[0]), "=&a" (E)
: "a" (len-1), "a" (in), "a" (2), "d" (entries), "d" (len<<1), "1" (codebook), "4" (E), "2" (best_dist[0]), "3" (nbest[0])
- : "R0", "R1", "R2", "I0", "L0", "B0", "A0", "cc", "memory"
+ : "R0", "R1", "R2", "I0", "L0", "B0", "A0", "cc", "memory",
+ "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
);
}
} else {
@@ -89,7 +92,7 @@ void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entri
"%0 = (A0 -= R0.L*R1.L) (IS);\n\t"
: "=D" (dist), "=a" (codebook)
: "a" (len-1), "a" (in), "a" (2), "1" (codebook), "0" (E[i])
- : "R0", "R1", "I0", "L0", "A0"
+ : "R0", "R1", "I0", "L0", "A0", "ASTAT" BFIN_HWLOOP0_REGS
);
if (i<N || dist<best_dist[N-1])
{
--
1.6.2.3
More information about the Speex-dev
mailing list