[opus] ASM runtime detection and optimizations

Aurélien Zanelli aurelien.zanelli at parrot.com
Thu May 23 05:38:01 PDT 2013


I wrote a proof of concept regarding the cpu capabilities runtime 
detection and choice of optimized function. I follow design which had 
been discussed on IRC.

Also, i notice a little drawback: we must propagate the arch index 
through functions which don't have codec state as argument.

However, if it's look good, i will continue to implement it.

Best regards,
-- 
Aurélien Zanelli
Parrot SA
174, quai de Jemmapes
75010 Paris
France
-------------- next part --------------
diff --git a/Makefile.am b/Makefile.am
index f04e3bc..06d4283 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -5,7 +5,7 @@ lib_LTLIBRARIES = libopus.la
 
 DIST_SUBDIRS = doc
 
-INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
+INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed -I$(top_srcdir)/src
 
 include celt_sources.mk
 include silk_sources.mk
diff --git a/celt/celt_decoder.c b/celt/celt_decoder.c
index d5d2c57..673ab4b 100644
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -33,6 +33,7 @@
 
 #define CELT_DECODER_C
 
+#include "cpu_support.h"
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -69,6 +70,7 @@ struct OpusCustomDecoder {
    int downsample;
    int start, end;
    int signalling;
+   int arch;
 
    /* Everything beyond this point gets cleared on a reset */
 #define DECODER_RESET_START rng
@@ -159,6 +161,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
    st->signalling = 1;
 
    st->loss_count = 0;
+   st->arch = opus_select_arch();
 
    opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
 
@@ -430,7 +433,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
       {
          VARDECL( opus_val16, lp_pitch_buf );
          ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
-         pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+         pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C, st->arch);
          pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
                DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
                PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
@@ -496,7 +499,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
                      ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
             }
             /* Compute the excitation for exc_length samples before the loss. */
-            celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+            celt_fir[st->arch&OPUS_ARCHMASK](exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
                   exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
          }
 
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 26e6ebb..08fddd0 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -33,6 +33,7 @@
 
 #define CELT_ENCODER_C
 
+#include "cpu_support.h"
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -75,6 +76,7 @@ struct OpusCustomEncoder {
    int lsb_depth;
    int variable_duration;
    int lfe;
+   int arch;
 
    /* Everything beyond this point gets cleared on a reset */
 #define ENCODER_RESET_START rng
@@ -196,6 +198,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMod
    st->force_intra  = 0;
    st->complexity = 5;
    st->lsb_depth=24;
+   st->arch = opus_select_arch();
 
    opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
 
@@ -1014,7 +1017,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
       VARDECL(opus_val16, pitch_buf);
       ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
 
-      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+      pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
       /* Don't search for the fir last 1.5 octave of the range because
          there's too many false-positives due to short-term correlation */
       pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c
index c75c25b..1eac65b 100644
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -32,9 +32,21 @@
 #include "celt_lpc.h"
 #include "stack_alloc.h"
 #include "mathops.h"
+#include "cpu_support.h"
 
-#ifdef ARM_HAVE_NEON
+#ifdef ARM_ASM
 #include "celt_lpc_neon.h"
+void (* const celt_fir[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *,
+		opus_val16 *, int, int, opus_val16 *) = { 
+  celt_fir_c,    //C
+  celt_fir_c,    //ARMV4
+  celt_fir_c,    //ARMv5E
+  celt_fir_c,    //ARMv6
+  celt_fir_neon  //NEON
+};
+#else
+void (* const celt_fir[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *,
+		opus_val16 *, int, int, opus_val16 *) = {celt_fir_c};
 #endif
 
 void _celt_lpc(
@@ -91,8 +103,7 @@ int          p
 #endif
 }
 
-#ifndef OVERRIDE_CELT_FIR
-void celt_fir(const opus_val16 *x,
+void celt_fir_c(const opus_val16 *x,
          const opus_val16 *num,
          opus_val16 *y,
          int N,
@@ -116,7 +127,6 @@ void celt_fir(const opus_val16 *x,
       y[i] = ROUND16(sum, SIG_SHIFT);
    }
 }
-#endif
 
 void celt_iir(const opus_val32 *x,
          const opus_val16 *den,
@@ -142,7 +152,6 @@ void celt_iir(const opus_val32 *x,
    }
 }
 
-#ifndef OVERRIDE_CELT_AUTOCORR
 void _celt_autocorr(
                    const opus_val16 *x,   /*  in: [0...n-1] samples x   */
                    opus_val32       *ac,  /* out: [0...lag-1] ac values */
@@ -198,4 +207,3 @@ void _celt_autocorr(
 
    RESTORE_STACK;
 }
-#endif
diff --git a/celt/celt_lpc.h b/celt/celt_lpc.h
index 2baa77e..dcd9666 100644
--- a/celt/celt_lpc.h
+++ b/celt/celt_lpc.h
@@ -29,12 +29,16 @@
 #define PLC_H
 
 #include "arch.h"
+#include "cpu_support.h"
 
 #define LPC_ORDER 24
 
+void (* const celt_fir[OPUS_ARCHMASK+1])(const opus_val16 *, 
+		const opus_val16 *, opus_val16 *, int, int, opus_val16 *);
+
 void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
 
-void celt_fir(const opus_val16 *x,
+void celt_fir_c(const opus_val16 *x,
          const opus_val16 *num,
          opus_val16 *y,
          int N,
diff --git a/celt/celt_lpc_neon.h b/celt/celt_lpc_neon.h
index e9f76c6..029ae7b 100644
--- a/celt/celt_lpc_neon.h
+++ b/celt/celt_lpc_neon.h
@@ -28,9 +28,9 @@
 #define CELT_LPC_NEON_H
 
 #ifdef FIXED_POINT
+#include "stack_alloc.h"
+#include "mathops.h"
 
-#ifndef DISABLE_CELT_FIR_NEON
-#define OVERRIDE_CELT_FIR
 /* Optimized FIR filter for order 1 and 4 which are used by opus encoder
  * FIR calls in pitch.c are hard-coded with 1 and 4 order values
  *
@@ -240,7 +240,7 @@ static void celt_fir4(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
       );
 }
 
-void celt_fir(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
+void celt_fir_neon(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
     int N, int ord, opus_val16 *mem)
 {
   int i,j;
@@ -269,12 +269,9 @@ void celt_fir(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
     break;
   }
 }
-#endif /* CELT_FIR_NEON */
 
 
-#ifndef DISABLE_CELT_AUTOCORR_NEON
-#define OVERRIDE_CELT_AUTOCORR
-void _celt_autocorr(
+void _celt_autocorr_neon(
 		const opus_val16 *x,   /*  in: [0...n-1] samples x   */
 		opus_val32       *ac,  /* out: [0...lag-1] ac values */
 		const opus_val16       *window,
@@ -478,7 +475,6 @@ void _celt_autocorr(
 
 	RESTORE_STACK;
 }
-#endif /* CELT_AUTOCORR_NEON */
 
 #endif /* FIXED_POINT */
 
diff --git a/celt/pitch.c b/celt/pitch.c
index 800a52a..6850dee 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -109,7 +109,7 @@ void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
 }
 
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
-      int len, int C)
+      int len, int C, const int arch)
 {
    int i;
    opus_val32 ac[5];
@@ -167,11 +167,11 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
       tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
       lpc[i] = MULT16_16_Q15(lpc[i], tmp);
    }
-   celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);
+   celt_fir[arch&OPUS_ARCHMASK](x_lp, lpc, x_lp, len>>1, 4, mem);
 
    mem[0]=0;
    lpc[0]=QCONST16(.8f,12);
-   celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
+   celt_fir[arch&OPUS_ARCHMASK](x_lp, lpc, x_lp, len>>1, 1, mem);
 
 }
 
diff --git a/celt/pitch.h b/celt/pitch.h
index 2757071..ad23aa9 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -37,7 +37,7 @@
 #include "modes.h"
 
 void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
-      int len, int C);
+      int len, int C, const int arch);
 
 void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
                   int len, int max_pitch, int *pitch);
diff --git a/configure.ac b/configure.ac
index ee6df9a..6b9612f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -167,6 +167,7 @@ if test "x${ac_enable_asm}" = xyes ; then
         AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"],
             [asm_optimization="disabled"])
         if test "x${asm_optimization}" = "xARM" ; then
+            AC_DEFINE([ARM_ASM], 1, [Use generic ARM asm optimizations])
             AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations])
             AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0])
             if test "x${ARMv5E_ASM}" = "x1" ; then
diff --git a/opus_headers.mk b/opus_headers.mk
index 43a978c..2c7c077 100644
--- a/opus_headers.mk
+++ b/opus_headers.mk
@@ -2,6 +2,7 @@ OPUS_HEAD = \
 include/opus.h \
 include/opus_multistream.h \
 src/opus_private.h \
+src/cpu_support.h \
 src/analysis.h \
 src/mlp.h \
 src/tansig_table.h
diff --git a/opus_sources.mk b/opus_sources.mk
index e4eeb91..1e9791b 100644
--- a/opus_sources.mk
+++ b/opus_sources.mk
@@ -4,7 +4,8 @@ src/opus_encoder.c \
 src/opus_multistream.c \
 src/opus_multistream_encoder.c \
 src/opus_multistream_decoder.c \
-src/repacketizer.c
+src/repacketizer.c \
+src/armcpu.c
 
 OPUS_SOURCES_FLOAT = \
 src/analysis.c \
diff --git a/src/armcpu.c b/src/armcpu.c
new file mode 100644
index 0000000..10a2905
--- /dev/null
+++ b/src/armcpu.c
@@ -0,0 +1,160 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "armcpu.h"
+
+#if !defined(ARM_ASM) || \
+    !defined(ARMv5E_ASM) && !defined(ARMv6_ASM) && \
+    !defined(ARM_HAVE_NEON)
+opus_uint32 opus_cpu_capa(void)
+{
+  return 0;
+}
+#elif defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+opus_uint32 opus_cpu_capa(void){
+  opus_uint32 flags;
+  flags=0;
+  /*MSVC has no inline __asm support for ARM, but it does let you __emit
+   *      instructions via their assembled hex code.
+   *          All of these instructions should be essentially nops.*/
+# if defined(ARMv5E_ASM)
+  __try{
+    /*PLD [r13]*/
+    __emit(0xF5DDF000);
+    flags|=OPUS_CPU_ARM_EDSP;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#  if defined(ARMv6E_ASM)
+  __try{
+    /*SHADD8 r3,r3,r3*/
+    __emit(0xE6333F93);
+    flags|=OPUS_CPU_ARM_MEDIA;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   if defined(ARM_HAVE_NEON)
+  __try{
+    /*VORR q0,q0,q0*/
+    __emit(0xF2200150);
+    flags|=OPUS_CPU_ARM_NEON;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   endif
+#  endif
+# endif
+  return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capa(void)
+{
+  opus_uint32 flags = 0;
+  FILE *cpuinfo;
+
+  /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+   * Android */
+  cpuinfo = fopen("/proc/cpuinfo", "r");
+
+  if(cpuinfo != NULL)
+  {
+    /* 512 should be enough for anybody (it's even enough for all the flags that
+     * x86 has accumulated... so far). */
+    char buf[512];
+
+    while(fgets(buf, 512, cpuinfo) != NULL)
+    {
+      /* Search for edsp and neon flag */
+      if(memcmp(buf, "Features", 8) == 0)
+      {
+        char *p;
+        p = strstr(buf, " edsp");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_EDSP;
+
+        p = strstr(buf, " neon");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_NEON;
+      }
+
+      /* Search for media capabilities (>= ARMv6) */
+      if(memcmp(buf, "CPU architecture:", 17) == 0)
+      {
+        int version;
+        version = atoi(buf+17);
+
+        if(version == 4)
+          flags |= OPUS_CPU_ARM_V4;
+
+        if(version >= 6)
+          flags |= OPUS_CPU_ARM_MEDIA;
+      }
+    }
+
+    fclose(cpuinfo);
+  }
+  return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+   "your platform.  Reconfigure with --disable-asm (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+  opus_uint32 flags = opus_cpu_capa();
+  
+  if(flags & OPUS_CPU_ARM_NEON)
+    return 4;
+  else if(flags & OPUS_CPU_ARM_MEDIA)
+    return 3;
+  else if(flags & OPUS_CPU_ARM_EDSP)
+    return 2;
+  else if(flags & OPUS_CPU_ARM_V4)
+    return 1;
+  else
+    return 0;
+}
diff --git a/src/armcpu.h b/src/armcpu.h
new file mode 100644
index 0000000..358df84
--- /dev/null
+++ b/src/armcpu.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifndef ARMCPU_H
+#define ARMCPU_H
+
+#include "opus_types.h"
+#include "os_support.h"
+
+#define OPUS_CPU_ARM_V4    (1)    
+#define OPUS_CPU_ARM_EDSP  (1<<1)    
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON  (1<<3)
+
+opus_uint32 opus_cpu_capa(void);
+int opus_select_arch(void);
+
+#endif
diff --git a/src/cpu_support.h b/src/cpu_support.h
new file mode 100644
index 0000000..bc3b4a3
--- /dev/null
+++ b/src/cpu_support.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#ifdef ARM_ASM
+
+#include "armcpu.h"
+
+/* We currently support C code and 4 ARM variants:
+ * arch[0] -> C
+ * arch[1] -> ARMv4
+ * arch[2] -> ARMv5E
+ * arch[3] -> ARMv6
+ * arch[4] -> NEON
+ */
+#define OPUS_ARCHMASK 4
+
+#else
+#define OPUS_ARCHMASK 0
+static inline opus_uint32 opus_cpu_capa(void)
+{
+	return 0;
+}
+
+static inline int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+
+#endif
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index f0b2b6f..6bc7091 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -46,6 +46,7 @@
 #include "structs.h"
 #include "define.h"
 #include "mathops.h"
+#include "cpu_support.h"
 
 struct OpusDecoder {
    int          celt_dec_offset;
@@ -70,6 +71,7 @@ struct OpusDecoder {
 #endif
 
    opus_uint32  rangeFinal;
+   int arch;
 };
 
 #ifdef FIXED_POINT
@@ -119,6 +121,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
    st->Fs = Fs;
    st->DecControl.API_sampleRate = st->Fs;
    st->DecControl.nChannelsAPI      = st->channels;
+   st->arch = opus_select_arch();
 
    /* Reset decoder */
    ret = silk_InitDecoder( silk_dec );
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index b6424d6..305fad9 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -40,6 +40,7 @@
 #include "arch.h"
 #include "opus_private.h"
 #include "os_support.h"
+#include "cpu_support.h"
 #include "analysis.h"
 #include "mathops.h"
 #include "tuning_parameters.h"
@@ -103,6 +104,7 @@ struct OpusEncoder {
     int          analysis_offset;
 #endif
     opus_uint32  rangeFinal;
+    int arch;
 };
 
 /* Transition tables for the voice and music. First column is the
@@ -184,6 +186,8 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
 
     st->Fs = Fs;
 
+    st->arch = opus_select_arch();
+
     ret = silk_InitEncoder( silk_enc, &st->silk_mode );
     if(ret)return OPUS_INTERNAL_ERROR;
 


More information about the opus mailing list