[opus] [PATCH] Make CELT FFT twiddle complex type aligned
Zheng Lv
lvzheng at google.com
Thu Sep 15 11:03:59 UTC 2022
This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for
fixed-point builds. Tested with an armv6j+nofp development board, CELT
encoding becomes 1.4x as fast, and decoding over 2x.
Performance gain is mostly attributed to the proper alignment of the
static const array mdct_twiddles960.
Co-authored-by: David Gao <davidgao at google.com>
---
celt/kiss_fft.h | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h
index bffa2bfa..267f72f9 100644
--- a/celt/kiss_fft.h
+++ b/celt/kiss_fft.h
@@ -49,31 +49,41 @@ extern "C" {
#ifdef FIXED_POINT
#include "arch.h"
# define kiss_fft_scalar opus_int32
# define kiss_twiddle_scalar opus_int16
+/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory
+ * access, and could benefit from additional alignment.
+ */
+# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32))
#else
# ifndef kiss_fft_scalar
/* default is float */
# define kiss_fft_scalar float
# define kiss_twiddle_scalar float
# define KF_SUFFIX _celt_single
# endif
#endif
+#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT)
+#define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT)))
+#else
+#define KISS_TWIDDLE_CPX_ALIGNED
+#endif
+
typedef struct {
kiss_fft_scalar r;
kiss_fft_scalar i;
}kiss_fft_cpx;
typedef struct {
kiss_twiddle_scalar r;
kiss_twiddle_scalar i;
-}kiss_twiddle_cpx;
+} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx;
#define MAXFACTORS 8
/* e.g. an fft of length 128 has 4 factors
as far as kissfft is concerned
4*4*4*2
*/
--
2.37.2.789.g6183377224-goog
More information about the opus
mailing list