[opus] [RFC PATCH v1 4/8] aarch64: Enable intrinsics for aarch64
Viswanath Puttagunta
viswanath.puttagunta at linaro.org
Tue Apr 28 15:24:52 PDT 2015
Enables existing neon intrinsic optimizations to work
on aarch64 target.
Signed-off-by: Viswanath Puttagunta <viswanath.puttagunta at linaro.org>
---
Makefile.am | 4 +-
celt/arm/arm_celt_map.c | 4 +-
celt/arm/celt_ne10_fft.c | 2 +
celt/arm/celt_ne10_mdct.c | 3 ++
celt/arm/pitch_arm.h | 2 +-
celt/dump_modes/Makefile | 2 +-
celt/pitch.h | 5 +--
celt/tests/test_unit_dft.c | 3 +-
celt/tests/test_unit_mathops.c | 7 ++--
celt/tests/test_unit_mdct.c | 4 +-
celt/tests/test_unit_rotation.c | 5 ++-
configure.ac | 93 +++++++++++++++++++----------------------
12 files changed, 67 insertions(+), 67 deletions(-)
diff --git a/Makefile.am b/Makefile.am
index 3a75740..8bd7447 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -47,7 +47,7 @@ if CPU_ARM
CELT_SOURCES += $(CELT_SOURCES_ARM)
SILK_SOURCES += $(SILK_SOURCES_ARM)
-if OPUS_ARM_NEON_INTR
+if HAVE_ARM_NEON_INTR
CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
endif
@@ -286,7 +286,7 @@ SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
$(SSE4_1_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
endif
-if OPUS_ARM_NEON_INTR
+if HAVE_ARM_NEON_INTR
CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
$(CELT_SOURCES_ARM_NE10:.c=.lo) \
%test_unit_mdct.o %test_unit_dft.o
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index f132fe1..918e6cf 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -44,7 +44,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
};
# else /* !FIXED_POINT */
-# if defined(OPUS_ARM_NEON_INTR)
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
@@ -113,7 +113,7 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
};
#endif /* HAVE_ARM_NE10 */
-# endif /* OPUS_ARM_NEON_INTR */
+# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
# endif /* FIXED_POINT */
#endif /* OPUS_HAVE_RTCD */
diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
index d354502..1901024 100644
--- a/celt/arm/celt_ne10_fft.c
+++ b/celt/arm/celt_ne10_fft.c
@@ -44,6 +44,7 @@
#include "os_support.h"
#include "stack_alloc.h"
+#if !defined(FIXED_POINT)
#ifdef CUSTOM_MODES
/* nfft lengths in NE10 that support scaled fft */
@@ -144,3 +145,4 @@ void opus_ifft_float_neon(const kiss_fft_state *st,
}
RESTORE_STACK;
}
+#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c
index 0979cbe..938fc93 100644
--- a/celt/arm/celt_ne10_mdct.c
+++ b/celt/arm/celt_ne10_mdct.c
@@ -43,6 +43,8 @@
#include "os_support.h"
#include "stack_alloc.h"
+#if !defined(FIXED_POINT)
+
void clt_mdct_forward_float_neon(const mdct_lookup *l,
kiss_fft_scalar *in,
kiss_fft_scalar * OPUS_RESTRICT out,
@@ -258,3 +260,4 @@ void clt_mdct_backward_float_neon(const mdct_lookup *l,
}
RESTORE_STACK;
}
+#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h
index 8626ed7..344186b 100644
--- a/celt/arm/pitch_arm.h
+++ b/celt/arm/pitch_arm.h
@@ -57,7 +57,7 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch);
-#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
+#if defined(OPUS_ARM_PRESUME_NEON_INTR)
#define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
index 10c3679..fef8d94 100644
--- a/celt/dump_modes/Makefile
+++ b/celt/dump_modes/Makefile
@@ -15,7 +15,7 @@ SOURCES = dump_modes.c \
ifdef HAVE_ARM_NE10
CC = gcc
CFLAGS += -mfpu=neon
-INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_NEON_INTR
+INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
LIBDIR = -l:$(NE10_LIBDIR)/libNE10.so
SOURCES += ../arm/celt_ne10_fft.c \
dump_modes_arm_ne10.c \
diff --git a/celt/pitch.h b/celt/pitch.h
index af745eb..dde48c8 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -46,8 +46,7 @@
#include "mips/pitch_mipsr1.h"
#endif
-#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
- || defined(OPUS_ARM_NEON_INTR))
+#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
# include "arm/pitch_arm.h"
#endif
@@ -189,7 +188,7 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
#if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD) && \
- (defined(OPUS_ARM_ASM) || (defined(OPUS_ARM_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
+ (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
extern
# if defined(FIXED_POINT)
opus_val32
diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c
index 9fbcdc4..e17e26f 100644
--- a/celt/tests/test_unit_dft.c
+++ b/celt/tests/test_unit_dft.c
@@ -45,8 +45,7 @@
#include "mathops.c"
#include "entcode.c"
-#if defined(OPUS_HAVE_RTCD) && \
- (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) || defined(OPUS_ARM_ASM)
#include "arm/armcpu.c"
#if !defined(FIXED_POINT)
#if defined(HAVE_ARM_NE10)
diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c
index a1cf2f7..2e43e07 100644
--- a/celt/tests/test_unit_mathops.c
+++ b/celt/tests/test_unit_mathops.c
@@ -65,17 +65,18 @@
#include "x86/celt_lpc_sse.c"
#endif
#include "x86/x86_celt_map.c"
+
#elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
- || defined(OPUS_ARM_NEON_INTR))
-#if defined(OPUS_ARM_NEON_INTR)
+ || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#include "arm/celt_neon_intr.c"
+#endif
#if defined(HAVE_ARM_NE10)
#include "kiss_fft.c"
#include "mdct.c"
#include "arm/celt_ne10_fft.c"
#include "arm/celt_ne10_mdct.c"
#endif
-#endif
#include "arm/arm_celt_map.c"
#endif
diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c
index fdee079..53258fe 100644
--- a/celt/tests/test_unit_mdct.c
+++ b/celt/tests/test_unit_mdct.c
@@ -46,8 +46,8 @@
#include "mathops.c"
#include "entcode.c"
-#if defined(OPUS_HAVE_RTCD) && \
- (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) || defined(OPUS_ARM_ASM)
#include "arm/armcpu.c"
#if !defined(FIXED_POINT)
#if defined(HAVE_ARM_NE10)
diff --git a/celt/tests/test_unit_rotation.c b/celt/tests/test_unit_rotation.c
index 4ac838e..ecab5cb 100644
--- a/celt/tests/test_unit_rotation.c
+++ b/celt/tests/test_unit_rotation.c
@@ -63,9 +63,10 @@
#include "x86/celt_lpc_sse.c"
#endif
#include "x86/x86_celt_map.c"
+
#elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
- || defined(OPUS_ARM_NEON_INTR))
-#if defined(OPUS_ARM_NEON_INTR)
+ || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#include "arm/celt_neon_intr.c"
#endif
#if defined(HAVE_ARM_NE10)
diff --git a/configure.ac b/configure.ac
index 2380a5c..a150d87 100644
--- a/configure.ac
+++ b/configure.ac
@@ -444,7 +444,7 @@ AC_DEFUN([OPUS_PATH_NE10],
AS_IF([test x"$enable_intrinsics" = x"yes"],[
intrinsics_support=""
AS_CASE([$host_cpu],
- [arm*],
+ [arm*|aarch64],
[
cpu_arm=yes
OPUS_CHECK_INTRINSICS(
@@ -459,55 +459,50 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
SUMM = vmlaq_f32(SUMM, A0, A1);
]]
)
- AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
- [
- OPUS_ARM_NEON_INTR_CFLAGS="$ARM_NEON_INTR_CFLAGS"
- AC_SUBST([OPUS_ARM_NEON_INTR_CFLAGS])
- ]
+
+ AS_CASE([$host_cpu],
+ [arm*],
+ [
+ AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
+ [
+ OPUS_ARM_NEON_INTR_CFLAGS="$ARM_NEON_INTR_CFLAGS"
+ AC_SUBST([OPUS_ARM_NEON_INTR_CFLAGS])
+ dnl Don't see why defining these is necessary to check features at runtime
+ AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
+ AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
+ AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
+ ]
+ )
+ ]
)
- #Currently we only have intrinsic optimizations for floating point
- AS_IF([test x"$enable_float" = x"yes"],
+ AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
[
- AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
- [
- OPUS_ARM_NEON_INTR=1
- AC_DEFINE([OPUS_ARM_NEON_INTR], 1,
- [Support ARMv7 Neon Intrinsics for float])
- AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON_INTR], 1,
- [Compiler supports ARMv7 Neon Intrinsics])
- intrinsics_support="$intrinsics_support (Neon_Intrinsics)"
-
- AS_IF([test x"enable_rtcd" != x"" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
- [rtcd_support="$rtcd_support (ARMv7_Neon_Intrinsics)"],[])
-
- AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
- [AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1,
- [Define if binary requires NEON intrinsics support])])
-
- AS_IF([test x"$rtcd_support" = x""],
- [rtcd_support=no])
-
- AS_IF([test x"$intrinsics_support" = x""],
- [intrinsics_support=no],
- [intrinsics_support="arm$intrinsics_support"])
-
- dnl Don't see why defining these is necessary to check features at runtime
- AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
- AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
- AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
-
- OPUS_PATH_NE10()
- AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
- [intrinsics_support="$intrinsics_support NE10"],[])
- ],
- [
- AC_MSG_WARN([Compiler does not support ARM intrinsics])
- intrinsics_support=no
- ])
- ], [
- AC_MSG_WARN([Currently only have ARM intrinsics for float])
- intrinsics_support=no
+ AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON_INTR], 1,
+ [Compiler supports ARMv7 Neon Intrinsics])
+ intrinsics_support="$intrinsics_support (Neon_Intrinsics)"
+
+ AS_IF([test x"enable_rtcd" != x"" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
+ [rtcd_support="$rtcd_support (ARMv7_Neon_Intrinsics)"],[])
+
+ AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
+ [AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1,
+ [Define if binary requires NEON intrinsics support])])
+
+ AS_IF([test x"$rtcd_support" = x""],
+ [rtcd_support=no])
+
+ AS_IF([test x"$intrinsics_support" = x""],
+ [intrinsics_support=no],
+ [intrinsics_support="arm$intrinsics_support"])
+
+ OPUS_PATH_NE10()
+ AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
+ [intrinsics_support="$intrinsics_support NE10"],[])
+ ],
+ [
+ AC_MSG_WARN([Compiler does not support ARM intrinsics])
+ intrinsics_support=no
])
],
[i?86|x86_64],
@@ -663,8 +658,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
])
AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
-AM_CONDITIONAL([OPUS_ARM_NEON_INTR],
- [test x"$OPUS_ARM_NEON_INTR" = x"1"])
+AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
+ [test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
AM_CONDITIONAL([HAVE_ARM_NE10],
[test x"$HAVE_ARM_NE10" = x"1"])
--
1.9.1
More information about the opus
mailing list