[Speex-dev] [PATCH] Blackfin: cleanup astat/cc/hardware loop asm clobbers

Mike Frysinger vapier at gentoo.org
Fri Apr 24 16:15:42 PDT 2009


Most asm statements clobber ASTAT bits (shifts, maxes, etc...) but do
declare the register as clobbered.  Same thing with CC in a few places.
Some places make an attempt at clobbering some hardware loop registers,
but it's very incomplete compared with how many asm statements actually
use hardware loops.

Signed-off-by: Mike Frysinger <vapier at gentoo.org>
---
 libspeex/bfin.h           |   15 +++++++++++++++
 libspeex/cb_search_bfin.h |    9 ++++-----
 libspeex/filters_bfin.h   |   15 ++++++++++-----
 libspeex/fixed_bfin.h     |   15 +++++++++------
 libspeex/lpc_bfin.h       |    5 ++++-
 libspeex/lsp_bfin.h       |    2 +-
 libspeex/ltp_bfin.h       |   25 ++++++++++---------------
 libspeex/misc_bfin.h      |    4 +++-
 libspeex/quant_lsp_bfin.h |    7 +++++--
 libspeex/vq_bfin.h        |    7 +++++--
 10 files changed, 66 insertions(+), 38 deletions(-)
 create mode 100644 libspeex/bfin.h

diff --git a/libspeex/bfin.h b/libspeex/bfin.h
new file mode 100644
index 0000000..b934cf2
--- /dev/null
+++ b/libspeex/bfin.h
@@ -0,0 +1,15 @@
+/* Common Blackfin assembly defines
+ *
+ * Copyright (C) 2005-2009 Analog Devices
+ */
+
+#if __GNUC__ <= 3
+/* GCC-3.4 and older did not use hardware loops and thus did not have
+ * register constraints for declaring clobbers.
+ */
+# define BFIN_HWLOOP0_REGS
+# define BFIN_HWLOOP1_REGS
+#else
+# define BFIN_HWLOOP0_REGS , "LB0", "LT0", "LC0"
+# define BFIN_HWLOOP1_REGS , "LB1", "LT1", "LC1"
+#endif
diff --git a/libspeex/cb_search_bfin.h b/libspeex/cb_search_bfin.h
index ae9cf83..edb1eca 100644
--- a/libspeex/cb_search_bfin.h
+++ b/libspeex/cb_search_bfin.h
@@ -33,6 +33,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "bfin.h"
+
 #define OVERRIDE_COMPUTE_WEIGHTED_CODEBOOK 
 void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *r, spx_word16_t *resp, spx_word16_t *resp2, spx_word32_t *E, int shape_cb_size, int subvect_size, char *stack)
 {
@@ -73,10 +75,7 @@ void compute_weighted_codebook(const signed char *shape_cb, const spx_word16_t *
          :
       : "m" (subvect_size), "m" (shape_cb), "m" (r), "m" (resp), "m" (E)
       : "A0", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "I0", "I1", "L0", 
-        "L1", "A0", "A1", "memory"
-#if !(__GNUC__ == 3)
-         , "LC0", "LC1" /* gcc 3.4 doesn't know about LC registers */
-#endif
+        "L1", "A0", "A1", "memory", "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
       );
       shape_cb += subvect_size;
       resp += subvect_size;
@@ -107,6 +106,6 @@ static inline void target_update(spx_word16_t *t, spx_word16_t g, spx_word16_t *
          "LOOP_END tupdate%=;\n\t"
    :
    : "a" (t), "a" (r), "d" (g), "a" (len)
-   : "R0", "R1", "R2", "A1", "I0", "I1", "L0", "L1"
+   : "R0", "R1", "R2", "A1", "I0", "I1", "L0", "L1", "ASTAT" BFIN_HWLOOP0_REGS
          );
 }
diff --git a/libspeex/filters_bfin.h b/libspeex/filters_bfin.h
index 1e433ee..ccd57b9 100644
--- a/libspeex/filters_bfin.h
+++ b/libspeex/filters_bfin.h
@@ -32,6 +32,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "bfin.h"
+
 #define OVERRIDE_NORMALIZE16
 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
 {
@@ -50,7 +52,7 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le
    "LOOP_END norm_max%=;\n\t"
    : "=&d" (max_val)
    : "a" (x), "a" (len)
-   : "R1", "R2"
+   : "R1", "R2", "ASTAT" BFIN_HWLOOP0_REGS
    );
 
    sig_shift=0;
@@ -74,7 +76,7 @@ int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int le
    "R1 = ASHIFT R0 by %2.L;\n\t"
    "W[P1++] = R1;\n\t"
    : : "a" (x), "a" (y), "d" (-sig_shift), "a" (len-1)
-   : "I0", "L0", "P1", "R0", "R1", "memory"
+   : "I0", "L0", "P1", "R0", "R1", "memory", "ASTAT" BFIN_HWLOOP0_REGS
    );
    return sig_shift;
 }
@@ -219,7 +221,8 @@ void filter_mem16(const spx_word16_t *_x, const spx_coef_t *num, const spx_coef_
    "LOOP_END mem_update%=;\n\t"
    "L0 = 0;\n\t"
    : : "m" (xy), "m" (_x), "m" (_y), "m" (numden), "m" (N), "m" (ord), "m" (mem)
-   : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory"
+   : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B0", "I0", "I2", "L0", "L2", "M0", "memory",
+     "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
 
 }
@@ -345,7 +348,8 @@ void iir_mem16(const spx_word16_t *_x, const spx_coef_t *den, spx_word16_t *_y,
    "LOOP_END mem_update%=;\n\t"
    "L1 = 0;\n\t"
    : : "m" (yy), "m" (_x), "m" (_y), "m" (den), "m" (N), "m" (ord), "m" (mem)
-   : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory"
+   : "A0", "A1", "R0", "R1", "R2", "R3", "R4", "R5", "P0", "P1", "P2", "P3", "P4", "B1", "I1", "I3", "L1", "L3", "memory",
+     "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
 
 }
@@ -426,7 +430,8 @@ void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, cons
          "LOOP_END samples%=;\n\t"
    : "=a" (ytmp2), "=a" (y)
    : "a" (awk2), "a" (ak), "d" (ord), "m" (N), "0" (ytmp2), "1" (y)
-   : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3", "A0", "A1"
+   : "A0", "A1", "R0", "R1", "R2", "R3", "I0", "I1", "I2", "I3", "L0", "L1", "L2", "L3",
+     "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
 }
 
diff --git a/libspeex/fixed_bfin.h b/libspeex/fixed_bfin.h
index aa26f6a..9eb21e3 100644
--- a/libspeex/fixed_bfin.h
+++ b/libspeex/fixed_bfin.h
@@ -36,6 +36,8 @@
 #ifndef FIXED_BFIN_H
 #define FIXED_BFIN_H
 
+#include "bfin.h"
+
 #undef PDIV32_16
 static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
 {
@@ -57,7 +59,7 @@ static inline spx_word16_t PDIV32_16(spx_word32_t a, spx_word16_t b)
          "%0 = R0;\n\t"
    : "=m" (res)
    : "m" (a), "m" (bb)
-   : "P0", "R0", "R1", "cc");
+   : "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
    return res;
 }
 
@@ -84,7 +86,7 @@ static inline spx_word16_t DIV32_16(spx_word32_t a, spx_word16_t b)
          "%0 = R0;\n\t"
    : "=m" (res)
    : "m" (a), "m" (bb)
-   : "P0", "R0", "R1", "cc");
+   : "P0", "R0", "R1", "ASTAT" BFIN_HWLOOP0_REGS);
    return res;
 }
 
@@ -98,6 +100,7 @@ static inline spx_word16_t MAX16(spx_word16_t a, spx_word16_t b)
          "%0 = MAX(%1,%2);"
    : "=d" (res)
    : "%d" (a), "d" (b)
+   : "ASTAT"
    );
    return res;
 }
@@ -113,7 +116,7 @@ static inline spx_word32_t MULT16_32_Q15(spx_word16_t a, spx_word32_t b)
          "%0 = (A1 += %2.L*%1.H) ;\n\t"
    : "=&W" (res), "=&d" (b)
    : "d" (a), "1" (b)
-   : "A1"
+   : "A1", "ASTAT"
    );
    return res;
 }
@@ -130,7 +133,7 @@ static inline spx_word32_t MAC16_32_Q15(spx_word32_t c, spx_word16_t a, spx_word
          "%0 = %0 + %4;\n\t"
    : "=&W" (res), "=&d" (b)
    : "d" (a), "1" (b), "d" (c)
-   : "A1"
+   : "A1", "ASTAT"
          );
    return res;
 }
@@ -147,7 +150,7 @@ static inline spx_word32_t MULT16_32_Q14(spx_word16_t a, spx_word32_t b)
          "%0 = (A1 += %1.L*%2.H);\n\t"
    : "=W" (res), "=d" (a), "=d" (b)
    : "1" (a), "2" (b)
-   : "A1"
+   : "A1", "ASTAT"
          );
    return res;
 }
@@ -165,7 +168,7 @@ static inline spx_word32_t MAC16_32_Q14(spx_word32_t c, spx_word16_t a, spx_word
          "%0 = %0 + %4;\n\t"
    : "=&W" (res), "=&d" (b)
    : "d" (a), "1" (b), "d" (c)
-   : "A1"
+   : "A1", "ASTAT"
          );
    return res;
 }
diff --git a/libspeex/lpc_bfin.h b/libspeex/lpc_bfin.h
index 7310ffb..d7d11c0 100644
--- a/libspeex/lpc_bfin.h
+++ b/libspeex/lpc_bfin.h
@@ -33,6 +33,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "bfin.h"
+
 #define OVERRIDE_SPEEX_AUTOCORR
 void _spx_autocorr(
 const spx_word16_t *x,   /*  in: [0...n-1] samples x   */
@@ -107,7 +109,8 @@ int          n
             "P0 += 4;\n\t"
          "LOOP_END pitch%=;\n\t"
    : : "m" (xs), "m" (x), "m" (ac32top), "m" (N_lag), "m" (lag_1), "m" (nshift)
-   : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "R4", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
+   : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "R4", "I0", "I1", "L0", "L1", "B0", "B1", "memory",
+     "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
    d=0;
    for (j=0;j<n;j++)
diff --git a/libspeex/lsp_bfin.h b/libspeex/lsp_bfin.h
index 20e5052..530367c 100644
--- a/libspeex/lsp_bfin.h
+++ b/libspeex/lsp_bfin.h
@@ -79,7 +79,7 @@ static inline spx_word32_t cheb_poly_eva(
       "%0 = R3;\n\t"
       : "=&d" (sum)
       : "a" (x), "a" (&coef[m]), "a" (m-1)
-      : "R0", "R1", "R3", "R2", "R4", "R5", "P0", "P1"
+      : "R0", "R1", "R3", "R2", "R4", "R5", "P0", "P1", "ASTAT" BFIN_HWLOOP0_REGS
       );
     return sum;
 }
diff --git a/libspeex/ltp_bfin.h b/libspeex/ltp_bfin.h
index b530f85..b7edd37 100644
--- a/libspeex/ltp_bfin.h
+++ b/libspeex/ltp_bfin.h
@@ -33,6 +33,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "bfin.h"
+
 #define OVERRIDE_INNER_PROD
 spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
 {
@@ -57,7 +59,7 @@ spx_word32_t inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
       "%0 = R0;\n\t"
    : "=m" (sum)
    : "m" (x), "m" (y), "d" (len-1)
-   : "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3"
+   : "P0", "P1", "P2", "R0", "R1", "A0", "I0", "I1", "L0", "L1", "R3", "ASTAT" BFIN_HWLOOP0_REGS
    );
    return sum;
 }
@@ -104,7 +106,8 @@ void pitch_xcorr(const spx_word16_t *_x, const spx_word16_t *_y, spx_word32_t *c
       "LOOP_END pitch%=;\n\t"
       "L0 = 0;\n\t"
    : : "m" (_x), "m" (_y), "m" (corr), "m" (len), "m" (nb_pitch)
-   : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory"
+   : "A0", "A1", "P0", "P1", "P2", "P3", "P4", "R0", "R1", "R2", "R3", "I0", "I1", "L0", "L1", "B0", "B1", "memory",
+     "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
 }
 
@@ -147,7 +150,7 @@ static inline spx_word32_t compute_pitch_error(spx_word16_t *C, spx_word16_t *g,
          "%0 = A0;\n\t"
    : "=&D" (sum), "=a" (C)
    : "d" (g[0]), "d" (g[1]), "d" (g[2]), "d" (pitch_control), "1" (C)
-   : "R0", "R1", "R2", "A0"
+   : "R0", "R1", "R2", "A0", "ASTAT"
          );
    return sum;
 }
@@ -201,10 +204,7 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p
 "eu2:      [P0++] = R2;\n\t"
        : : "d" (energy), "d" (&sw[-start-1]), "d" (&sw[-start+len-1]),
            "a" (end-start)  
-       : "P0", "I1", "I2", "R0", "R1", "R2", "R3"
-#if (__GNUC__ == 4)
-         , "LC1"
-#endif
+       : "P0", "I1", "I2", "R0", "R1", "R2", "R3", "ASTAT" BFIN_HWLOOP1_REGS
        );
 
    pitch_xcorr(sw, sw-end, corr, len, end-start+1, stack);
@@ -245,10 +245,8 @@ void open_loop_nbest_pitch(spx_word16_t *sw, int start, int end, int len, int *p
 "        %0 = P0;\n\t"
        : "=&d" (pitch[0])
        : "a" (corr16), "a" (ener16), "a" (end+1-start), "d" (start) 
-       : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5"
-#if (__GNUC__ == 4)
-         , "LC1"
-#endif
+       : "P0", "P1", "I0", "I1", "R0", "R1", "R2", "R3", "R4", "R5",
+         "ASTAT", "CC" BFIN_HWLOOP1_REGS
        );
 
       }
@@ -407,10 +405,7 @@ static int pitch_gain_search_3tap_vq(
        : "a" (gain_cdbk), "a" (C16), "a" (gain_cdbk_size), "a" (max_gain),
          "b" (-VERY_LARGE32)
        : "R0", "R1", "R2", "R3", "R4", "P0", 
-         "P1", "I1", "L1", "A0", "B0"
-#if (__GNUC__ == 4)
-         , "LC1"
-#endif
+         "P1", "I1", "L1", "A0", "B0", "CC", "ASTAT" BFIN_HWLOOP1_REGS
        );
 
   return best_cdbk;
diff --git a/libspeex/misc_bfin.h b/libspeex/misc_bfin.h
index 77b082c..3c8c09d 100644
--- a/libspeex/misc_bfin.h
+++ b/libspeex/misc_bfin.h
@@ -33,6 +33,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "bfin.h"
+
 #define OVERRIDE_SPEEX_MOVE
 void *speex_move (void *dest, void *src, int n)
 {
@@ -48,7 +50,7 @@ void *speex_move (void *dest, void *src, int n)
          "[%1++] = R0;\n\t"
    : "=a" (src), "=a" (dest)
    : "a" ((n>>2)-1), "0" (src), "1" (dest)
-   : "R0", "I0", "L0", "memory"
+   : "R0", "I0", "L0", "memory" BFIN_HWLOOP0_REGS
          );
    return dest;
 }
diff --git a/libspeex/quant_lsp_bfin.h b/libspeex/quant_lsp_bfin.h
index 087b466..efd23f5 100644
--- a/libspeex/quant_lsp_bfin.h
+++ b/libspeex/quant_lsp_bfin.h
@@ -36,6 +36,8 @@
 #define OVERRIDE_LSP_QUANT
 #ifdef OVERRIDE_LSP_QUANT
 
+#include "bfin.h"
+
 /*
   Note http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html
   well tell you all the magic resgister constraints used below
@@ -84,7 +86,8 @@ static int lsp_quant(
 "         L0 = 0;\n\t"
    : "=&d" (best_dist), "=&d" (best_id)
    : "a" (x), "b" (cdbk), "a" (nbVec), "a" (nbDim)
-   : "I0", "P2", "R0", "R1", "R2", "R3", "R5", "L0", "B0", "A0"
+   : "I0", "P2", "R0", "R1", "R2", "R3", "R5", "L0", "B0", "A0",
+     "CC", "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
 
    for (j=0;j<nbDim;j++) {
@@ -154,7 +157,7 @@ static int lsp_weight_quant(
    : "=&d" (best_dist), "=&d" (best_id)
    : "a" (x), "a" (weight), "b" (cdbk), "a" (nbVec), "a" (nbDim)
    : "I0", "I1", "P2", "R0", "R1", "R2", "R3", "R5", "A1",
-     "L0", "L1", "B0", "B1"
+     "L0", "L1", "B0", "B1", "CC", "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
    );
 
    for (j=0;j<nbDim;j++) {
diff --git a/libspeex/vq_bfin.h b/libspeex/vq_bfin.h
index 2cc9ea5..a4d2d2f 100644
--- a/libspeex/vq_bfin.h
+++ b/libspeex/vq_bfin.h
@@ -33,6 +33,8 @@
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
 
+#include "bfin.h"
+
 #define OVERRIDE_VQ_NBEST
 void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entries, spx_word32_t *E, int N, int *nbest, spx_word32_t *best_dist, char *stack)
 {
@@ -66,7 +68,8 @@ void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entri
             "LOOP_END entries_loop%=;\n\t"
             : "=&D" (dist), "=&a" (codebook), "=&d" (best_dist[0]), "=&d" (nbest[0]), "=&a" (E)
             : "a" (len-1), "a" (in), "a" (2), "d" (entries), "d" (len<<1), "1" (codebook), "4" (E), "2" (best_dist[0]), "3" (nbest[0])
-            : "R0", "R1", "R2", "I0", "L0", "B0", "A0", "cc", "memory"
+            : "R0", "R1", "R2", "I0", "L0", "B0", "A0", "cc", "memory",
+              "ASTAT" BFIN_HWLOOP0_REGS BFIN_HWLOOP1_REGS
                );
       }
    } else {
@@ -89,7 +92,7 @@ void vq_nbest(spx_word16_t *in, const spx_word16_t *codebook, int len, int entri
             "%0 = (A0 -= R0.L*R1.L) (IS);\n\t"
          : "=D" (dist), "=a" (codebook)
          : "a" (len-1), "a" (in), "a" (2), "1" (codebook), "0" (E[i])
-         : "R0", "R1", "I0", "L0", "A0"
+         : "R0", "R1", "I0", "L0", "A0", "ASTAT" BFIN_HWLOOP0_REGS
             );
       if (i<N || dist<best_dist[N-1])
       {
-- 
1.6.2.3



More information about the Speex-dev mailing list