[opus] [RFC PATCH v1 4/5] aarch64: Enable intrinsics for aarch64

Viswanath Puttagunta viswanath.puttagunta at linaro.org
Tue Mar 31 15:57:37 PDT 2015


Enables existing neon intrinsic optimizations to work
on aarch64 target.

Signed-off-by: Viswanath Puttagunta <viswanath.puttagunta at linaro.org>
---
 Makefile.am                     |  4 +-
 celt/arm/arm_celt_map.c         |  4 +-
 celt/arm/celt_ne10_fft.c        |  2 +
 celt/arm/celt_ne10_mdct.c       |  3 ++
 celt/arm/pitch_arm.h            |  2 +-
 celt/dump_modes/Makefile        |  2 +-
 celt/pitch.h                    |  5 +--
 celt/tests/test_unit_dft.c      |  3 +-
 celt/tests/test_unit_mathops.c  |  7 ++--
 celt/tests/test_unit_mdct.c     |  4 +-
 celt/tests/test_unit_rotation.c |  5 ++-
 configure.ac                    | 93 +++++++++++++++++++----------------------
 12 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 3a75740..8bd7447 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -47,7 +47,7 @@ if CPU_ARM
 CELT_SOURCES += $(CELT_SOURCES_ARM)
 SILK_SOURCES += $(SILK_SOURCES_ARM)
 
-if OPUS_ARM_NEON_INTR
+if HAVE_ARM_NEON_INTR
 CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
 endif
 
@@ -286,7 +286,7 @@ SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
 $(SSE4_1_OBJ) $(OPT_UNIT_TEST_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
 endif
 
-if OPUS_ARM_NEON_INTR
+if HAVE_ARM_NEON_INTR
 CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
                          $(CELT_SOURCES_ARM_NE10:.c=.lo) \
                          %test_unit_mdct.o %test_unit_dft.o
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index f132fe1..918e6cf 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -44,7 +44,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
   MAY_HAVE_NEON(celt_pitch_xcorr)   /* NEON */
 };
 # else /* !FIXED_POINT */
-#  if defined(OPUS_ARM_NEON_INTR)
+#  if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
     const opus_val16 *, opus_val32 *, int, int) = {
   celt_pitch_xcorr_c,              /* ARMv4 */
@@ -113,7 +113,7 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
 };
 
 #endif /* HAVE_ARM_NE10 */
-#  endif /* OPUS_ARM_NEON_INTR */
+#  endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
 # endif /* FIXED_POINT */
 
 #endif /* OPUS_HAVE_RTCD */
diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
index d354502..1901024 100644
--- a/celt/arm/celt_ne10_fft.c
+++ b/celt/arm/celt_ne10_fft.c
@@ -44,6 +44,7 @@
 #include "os_support.h"
 #include "stack_alloc.h"
 
+#if !defined(FIXED_POINT)
 #ifdef CUSTOM_MODES
 
 /* nfft lengths in NE10 that support scaled fft */
@@ -144,3 +145,4 @@ void opus_ifft_float_neon(const kiss_fft_state *st,
    }
    RESTORE_STACK;
 }
+#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c
index 0979cbe..938fc93 100644
--- a/celt/arm/celt_ne10_mdct.c
+++ b/celt/arm/celt_ne10_mdct.c
@@ -43,6 +43,8 @@
 #include "os_support.h"
 #include "stack_alloc.h"
 
+#if !defined(FIXED_POINT)
+
 void clt_mdct_forward_float_neon(const mdct_lookup *l,
                                  kiss_fft_scalar *in,
                                  kiss_fft_scalar * OPUS_RESTRICT out,
@@ -258,3 +260,4 @@ void clt_mdct_backward_float_neon(const mdct_lookup *l,
    }
    RESTORE_STACK;
 }
+#endif /* !defined(FIXED_POINT) */
diff --git a/celt/arm/pitch_arm.h b/celt/arm/pitch_arm.h
index 8626ed7..344186b 100644
--- a/celt/arm/pitch_arm.h
+++ b/celt/arm/pitch_arm.h
@@ -57,7 +57,7 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
 #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
                                  opus_val32 *xcorr, int len, int max_pitch);
-#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
+#if defined(OPUS_ARM_PRESUME_NEON_INTR)
 #define OVERRIDE_PITCH_XCORR (1)
 #   define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
    ((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
index 10c3679..fef8d94 100644
--- a/celt/dump_modes/Makefile
+++ b/celt/dump_modes/Makefile
@@ -15,7 +15,7 @@ SOURCES = dump_modes.c \
 ifdef HAVE_ARM_NE10
 CC = gcc
 CFLAGS += -mfpu=neon
-INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_NEON_INTR
+INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_PRESUME_NEON_INTR
 LIBDIR = -l:$(NE10_LIBDIR)/libNE10.so
 SOURCES += ../arm/celt_ne10_fft.c \
            dump_modes_arm_ne10.c \
diff --git a/celt/pitch.h b/celt/pitch.h
index af745eb..dde48c8 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -46,8 +46,7 @@
 #include "mips/pitch_mipsr1.h"
 #endif
 
-#if ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
-  || defined(OPUS_ARM_NEON_INTR))
+#if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
 # include "arm/pitch_arm.h"
 #endif
 
@@ -189,7 +188,7 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
 #if !defined(OVERRIDE_PITCH_XCORR)
 /*Is run-time CPU detection enabled on this platform?*/
 # if defined(OPUS_HAVE_RTCD) && \
-  (defined(OPUS_ARM_ASM) || (defined(OPUS_ARM_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)))
+  (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
 extern
 #  if defined(FIXED_POINT)
 opus_val32
diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c
index 9fbcdc4..e17e26f 100644
--- a/celt/tests/test_unit_dft.c
+++ b/celt/tests/test_unit_dft.c
@@ -45,8 +45,7 @@
 #include "mathops.c"
 #include "entcode.c"
 
-#if defined(OPUS_HAVE_RTCD) && \
-         (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) || defined(OPUS_ARM_ASM)
 #include "arm/armcpu.c"
 #if !defined(FIXED_POINT)
 #if defined(HAVE_ARM_NE10)
diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c
index a1cf2f7..2e43e07 100644
--- a/celt/tests/test_unit_mathops.c
+++ b/celt/tests/test_unit_mathops.c
@@ -65,17 +65,18 @@
 #include "x86/celt_lpc_sse.c"
 #endif
 #include "x86/x86_celt_map.c"
+
 #elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
-       || defined(OPUS_ARM_NEON_INTR))
-#if defined(OPUS_ARM_NEON_INTR)
+       || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #include "arm/celt_neon_intr.c"
+#endif
 #if defined(HAVE_ARM_NE10)
 #include "kiss_fft.c"
 #include "mdct.c"
 #include "arm/celt_ne10_fft.c"
 #include "arm/celt_ne10_mdct.c"
 #endif
-#endif
 #include "arm/arm_celt_map.c"
 #endif
 
diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c
index fdee079..53258fe 100644
--- a/celt/tests/test_unit_mdct.c
+++ b/celt/tests/test_unit_mdct.c
@@ -46,8 +46,8 @@
 #include "mathops.c"
 #include "entcode.c"
 
-#if defined(OPUS_HAVE_RTCD) && \
-         (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) || defined(OPUS_ARM_ASM)
 #include "arm/armcpu.c"
 #if !defined(FIXED_POINT)
 #if defined(HAVE_ARM_NE10)
diff --git a/celt/tests/test_unit_rotation.c b/celt/tests/test_unit_rotation.c
index 4ac838e..ecab5cb 100644
--- a/celt/tests/test_unit_rotation.c
+++ b/celt/tests/test_unit_rotation.c
@@ -63,9 +63,10 @@
 #include "x86/celt_lpc_sse.c"
 #endif
 #include "x86/x86_celt_map.c"
+
 #elif ((defined(OPUS_ARM_ASM) && defined(FIXED_POINT)) \
-       || defined(OPUS_ARM_NEON_INTR))
-#if defined(OPUS_ARM_NEON_INTR)
+       || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
 #include "arm/celt_neon_intr.c"
 #endif
 #if defined(HAVE_ARM_NE10)
diff --git a/configure.ac b/configure.ac
index 2380a5c..a150d87 100644
--- a/configure.ac
+++ b/configure.ac
@@ -444,7 +444,7 @@ AC_DEFUN([OPUS_PATH_NE10],
 AS_IF([test x"$enable_intrinsics" = x"yes"],[
    intrinsics_support=""
    AS_CASE([$host_cpu],
-   [arm*],
+   [arm*|aarch64],
    [
       cpu_arm=yes
       OPUS_CHECK_INTRINSICS(
@@ -459,55 +459,50 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
             SUMM = vmlaq_f32(SUMM, A0, A1);
          ]]
       )
-      AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
-          [
-             OPUS_ARM_NEON_INTR_CFLAGS="$ARM_NEON_INTR_CFLAGS"
-             AC_SUBST([OPUS_ARM_NEON_INTR_CFLAGS])
-          ]
+
+      AS_CASE([$host_cpu],
+         [arm*],
+         [
+            AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
+                  [
+                     OPUS_ARM_NEON_INTR_CFLAGS="$ARM_NEON_INTR_CFLAGS"
+                     AC_SUBST([OPUS_ARM_NEON_INTR_CFLAGS])
+                     dnl Don't see why defining these is necessary to check features at runtime
+                     AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
+                     AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
+                     AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
+                  ]
+            )
+         ]
       )
 
-      #Currently we only have intrinsic optimizations for floating point
-      AS_IF([test x"$enable_float" = x"yes"],
+      AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
       [
-         AS_IF([test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"],
-         [
-            OPUS_ARM_NEON_INTR=1
-            AC_DEFINE([OPUS_ARM_NEON_INTR], 1,
-                      [Support ARMv7 Neon Intrinsics for float])
-            AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON_INTR], 1,
-                      [Compiler supports ARMv7 Neon Intrinsics])
-            intrinsics_support="$intrinsics_support (Neon_Intrinsics)"
-
-            AS_IF([test x"enable_rtcd" != x"" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
-                  [rtcd_support="$rtcd_support (ARMv7_Neon_Intrinsics)"],[])
-
-            AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
-                  [AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1,
-                             [Define if binary requires NEON intrinsics support])])
-
-			   AS_IF([test x"$rtcd_support" = x""],
-                  [rtcd_support=no])
-
-            AS_IF([test x"$intrinsics_support" = x""],
-                  [intrinsics_support=no],
-			         [intrinsics_support="arm$intrinsics_support"])
-
-            dnl Don't see why defining these is necessary to check features at runtime
-            AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
-            AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
-            AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
-
-            OPUS_PATH_NE10()
-            AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
-                  [intrinsics_support="$intrinsics_support NE10"],[])
-         ],
-         [
-            AC_MSG_WARN([Compiler does not support ARM intrinsics])
-            intrinsics_support=no
-         ])
-      ], [
-            AC_MSG_WARN([Currently only have ARM intrinsics for float])
-            intrinsics_support=no
+         AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON_INTR], 1,
+                   [Compiler supports ARMv7 Neon Intrinsics])
+         intrinsics_support="$intrinsics_support (Neon_Intrinsics)"
+
+         AS_IF([test x"enable_rtcd" != x"" && test x"$OPUS_ARM_PRESUME_NEON_INTR" != x"1"],
+               [rtcd_support="$rtcd_support (ARMv7_Neon_Intrinsics)"],[])
+
+         AS_IF([test x"$OPUS_ARM_PRESUME_NEON_INTR" = x"1"],
+               [AC_DEFINE([OPUS_ARM_PRESUME_NEON_INTR], 1,
+                          [Define if binary requires NEON intrinsics support])])
+
+         AS_IF([test x"$rtcd_support" = x""],
+               [rtcd_support=no])
+
+         AS_IF([test x"$intrinsics_support" = x""],
+               [intrinsics_support=no],
+               [intrinsics_support="arm$intrinsics_support"])
+
+         OPUS_PATH_NE10()
+         AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
+               [intrinsics_support="$intrinsics_support NE10"],[])
+      ],
+      [
+         AC_MSG_WARN([Compiler does not support ARM intrinsics])
+         intrinsics_support=no
       ])
    ],
    [i?86|x86_64],
@@ -663,8 +658,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
 ])
 
 AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
-AM_CONDITIONAL([OPUS_ARM_NEON_INTR],
-    [test x"$OPUS_ARM_NEON_INTR" = x"1"])
+AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
+    [test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
 AM_CONDITIONAL([HAVE_ARM_NE10],
     [test x"$HAVE_ARM_NE10" = x"1"])
 
-- 
1.9.1



More information about the opus mailing list