[opus] ASM runtime detection and optimizations
Aurélien Zanelli
aurelien.zanelli at parrot.com
Thu May 23 05:38:01 PDT 2013
I wrote a proof of concept regarding the cpu capabilities runtime
detection and choice of optimized function. I follow design which had
been discussed on IRC.
Also, i notice a little drawback: we must propagate the arch index
through functions which don't have codec state as argument.
However, if it's look good, i will continue to implement it.
Best regards,
--
Aurélien Zanelli
Parrot SA
174, quai de Jemmapes
75010 Paris
France
-------------- next part --------------
diff --git a/Makefile.am b/Makefile.am
index f04e3bc..06d4283 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -5,7 +5,7 @@ lib_LTLIBRARIES = libopus.la
DIST_SUBDIRS = doc
-INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
+INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed -I$(top_srcdir)/src
include celt_sources.mk
include silk_sources.mk
diff --git a/celt/celt_decoder.c b/celt/celt_decoder.c
index d5d2c57..673ab4b 100644
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -33,6 +33,7 @@
#define CELT_DECODER_C
+#include "cpu_support.h"
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@@ -69,6 +70,7 @@ struct OpusCustomDecoder {
int downsample;
int start, end;
int signalling;
+ int arch;
/* Everything beyond this point gets cleared on a reset */
#define DECODER_RESET_START rng
@@ -159,6 +161,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->signalling = 1;
st->loss_count = 0;
+ st->arch = opus_select_arch();
opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
@@ -430,7 +433,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
{
VARDECL( opus_val16, lp_pitch_buf );
ALLOC( lp_pitch_buf, DECODE_BUFFER_SIZE>>1, opus_val16 );
- pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C);
+ pitch_downsample(decode_mem, lp_pitch_buf, DECODE_BUFFER_SIZE, C, st->arch);
pitch_search(lp_pitch_buf+(PLC_PITCH_LAG_MAX>>1), lp_pitch_buf,
DECODE_BUFFER_SIZE-PLC_PITCH_LAG_MAX,
PLC_PITCH_LAG_MAX-PLC_PITCH_LAG_MIN, &pitch_index);
@@ -496,7 +499,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, opus_val16 * OPUS_R
ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
}
/* Compute the excitation for exc_length samples before the loss. */
- celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
+ celt_fir[st->arch&OPUS_ARCHMASK](exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem);
}
diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
index 26e6ebb..08fddd0 100644
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -33,6 +33,7 @@
#define CELT_ENCODER_C
+#include "cpu_support.h"
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@@ -75,6 +76,7 @@ struct OpusCustomEncoder {
int lsb_depth;
int variable_duration;
int lfe;
+ int arch;
/* Everything beyond this point gets cleared on a reset */
#define ENCODER_RESET_START rng
@@ -196,6 +198,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_init(CELTEncoder *st, const CELTMod
st->force_intra = 0;
st->complexity = 5;
st->lsb_depth=24;
+ st->arch = opus_select_arch();
opus_custom_encoder_ctl(st, OPUS_RESET_STATE);
@@ -1014,7 +1017,7 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
VARDECL(opus_val16, pitch_buf);
ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
- pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+ pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC, st->arch);
/* Don't search for the fir last 1.5 octave of the range because
there's too many false-positives due to short-term correlation */
pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c
index c75c25b..1eac65b 100644
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -32,9 +32,21 @@
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
+#include "cpu_support.h"
-#ifdef ARM_HAVE_NEON
+#ifdef ARM_ASM
#include "celt_lpc_neon.h"
+void (* const celt_fir[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *,
+ opus_val16 *, int, int, opus_val16 *) = {
+ celt_fir_c, //C
+ celt_fir_c, //ARMV4
+ celt_fir_c, //ARMv5E
+ celt_fir_c, //ARMv6
+ celt_fir_neon //NEON
+};
+#else
+void (* const celt_fir[OPUS_ARCHMASK+1])(const opus_val16 *, const opus_val16 *,
+ opus_val16 *, int, int, opus_val16 *) = {celt_fir_c};
#endif
void _celt_lpc(
@@ -91,8 +103,7 @@ int p
#endif
}
-#ifndef OVERRIDE_CELT_FIR
-void celt_fir(const opus_val16 *x,
+void celt_fir_c(const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
@@ -116,7 +127,6 @@ void celt_fir(const opus_val16 *x,
y[i] = ROUND16(sum, SIG_SHIFT);
}
}
-#endif
void celt_iir(const opus_val32 *x,
const opus_val16 *den,
@@ -142,7 +152,6 @@ void celt_iir(const opus_val32 *x,
}
}
-#ifndef OVERRIDE_CELT_AUTOCORR
void _celt_autocorr(
const opus_val16 *x, /* in: [0...n-1] samples x */
opus_val32 *ac, /* out: [0...lag-1] ac values */
@@ -198,4 +207,3 @@ void _celt_autocorr(
RESTORE_STACK;
}
-#endif
diff --git a/celt/celt_lpc.h b/celt/celt_lpc.h
index 2baa77e..dcd9666 100644
--- a/celt/celt_lpc.h
+++ b/celt/celt_lpc.h
@@ -29,12 +29,16 @@
#define PLC_H
#include "arch.h"
+#include "cpu_support.h"
#define LPC_ORDER 24
+void (* const celt_fir[OPUS_ARCHMASK+1])(const opus_val16 *,
+ const opus_val16 *, opus_val16 *, int, int, opus_val16 *);
+
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
-void celt_fir(const opus_val16 *x,
+void celt_fir_c(const opus_val16 *x,
const opus_val16 *num,
opus_val16 *y,
int N,
diff --git a/celt/celt_lpc_neon.h b/celt/celt_lpc_neon.h
index e9f76c6..029ae7b 100644
--- a/celt/celt_lpc_neon.h
+++ b/celt/celt_lpc_neon.h
@@ -28,9 +28,9 @@
#define CELT_LPC_NEON_H
#ifdef FIXED_POINT
+#include "stack_alloc.h"
+#include "mathops.h"
-#ifndef DISABLE_CELT_FIR_NEON
-#define OVERRIDE_CELT_FIR
/* Optimized FIR filter for order 1 and 4 which are used by opus encoder
* FIR calls in pitch.c are hard-coded with 1 and 4 order values
*
@@ -240,7 +240,7 @@ static void celt_fir4(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
);
}
-void celt_fir(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
+void celt_fir_neon(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
int N, int ord, opus_val16 *mem)
{
int i,j;
@@ -269,12 +269,9 @@ void celt_fir(const opus_val16 *x, const opus_val16 *num, opus_val16 *y,
break;
}
}
-#endif /* CELT_FIR_NEON */
-#ifndef DISABLE_CELT_AUTOCORR_NEON
-#define OVERRIDE_CELT_AUTOCORR
-void _celt_autocorr(
+void _celt_autocorr_neon(
const opus_val16 *x, /* in: [0...n-1] samples x */
opus_val32 *ac, /* out: [0...lag-1] ac values */
const opus_val16 *window,
@@ -478,7 +475,6 @@ void _celt_autocorr(
RESTORE_STACK;
}
-#endif /* CELT_AUTOCORR_NEON */
#endif /* FIXED_POINT */
diff --git a/celt/pitch.c b/celt/pitch.c
index 800a52a..6850dee 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -109,7 +109,7 @@ void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len,
}
void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
- int len, int C)
+ int len, int C, const int arch)
{
int i;
opus_val32 ac[5];
@@ -167,11 +167,11 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp);
lpc[i] = MULT16_16_Q15(lpc[i], tmp);
}
- celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem);
+ celt_fir[arch&OPUS_ARCHMASK](x_lp, lpc, x_lp, len>>1, 4, mem);
mem[0]=0;
lpc[0]=QCONST16(.8f,12);
- celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem);
+ celt_fir[arch&OPUS_ARCHMASK](x_lp, lpc, x_lp, len>>1, 1, mem);
}
diff --git a/celt/pitch.h b/celt/pitch.h
index 2757071..ad23aa9 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -37,7 +37,7 @@
#include "modes.h"
void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp,
- int len, int C);
+ int len, int C, const int arch);
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
int len, int max_pitch, int *pitch);
diff --git a/configure.ac b/configure.ac
index ee6df9a..6b9612f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -167,6 +167,7 @@ if test "x${ac_enable_asm}" = xyes ; then
AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"],
[asm_optimization="disabled"])
if test "x${asm_optimization}" = "xARM" ; then
+ AC_DEFINE([ARM_ASM], 1, [Use generic ARM asm optimizations])
AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations])
AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0])
if test "x${ARMv5E_ASM}" = "x1" ; then
diff --git a/opus_headers.mk b/opus_headers.mk
index 43a978c..2c7c077 100644
--- a/opus_headers.mk
+++ b/opus_headers.mk
@@ -2,6 +2,7 @@ OPUS_HEAD = \
include/opus.h \
include/opus_multistream.h \
src/opus_private.h \
+src/cpu_support.h \
src/analysis.h \
src/mlp.h \
src/tansig_table.h
diff --git a/opus_sources.mk b/opus_sources.mk
index e4eeb91..1e9791b 100644
--- a/opus_sources.mk
+++ b/opus_sources.mk
@@ -4,7 +4,8 @@ src/opus_encoder.c \
src/opus_multistream.c \
src/opus_multistream_encoder.c \
src/opus_multistream_decoder.c \
-src/repacketizer.c
+src/repacketizer.c \
+src/armcpu.c
OPUS_SOURCES_FLOAT = \
src/analysis.c \
diff --git a/src/armcpu.c b/src/armcpu.c
new file mode 100644
index 0000000..10a2905
--- /dev/null
+++ b/src/armcpu.c
@@ -0,0 +1,160 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "armcpu.h"
+
+#if !defined(ARM_ASM) || \
+ !defined(ARMv5E_ASM) && !defined(ARMv6_ASM) && \
+ !defined(ARM_HAVE_NEON)
+opus_uint32 opus_cpu_capa(void)
+{
+ return 0;
+}
+#elif defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+opus_uint32 opus_cpu_capa(void){
+ opus_uint32 flags;
+ flags=0;
+ /*MSVC has no inline __asm support for ARM, but it does let you __emit
+ * instructions via their assembled hex code.
+ * All of these instructions should be essentially nops.*/
+# if defined(ARMv5E_ASM)
+ __try{
+ /*PLD [r13]*/
+ __emit(0xF5DDF000);
+ flags|=OPUS_CPU_ARM_EDSP;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# if defined(ARMv6E_ASM)
+ __try{
+ /*SHADD8 r3,r3,r3*/
+ __emit(0xE6333F93);
+ flags|=OPUS_CPU_ARM_MEDIA;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# if defined(ARM_HAVE_NEON)
+ __try{
+ /*VORR q0,q0,q0*/
+ __emit(0xF2200150);
+ flags|=OPUS_CPU_ARM_NEON;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# endif
+# endif
+# endif
+ return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capa(void)
+{
+ opus_uint32 flags = 0;
+ FILE *cpuinfo;
+
+ /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+ * Android */
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+
+ if(cpuinfo != NULL)
+ {
+ /* 512 should be enough for anybody (it's even enough for all the flags that
+ * x86 has accumulated... so far). */
+ char buf[512];
+
+ while(fgets(buf, 512, cpuinfo) != NULL)
+ {
+ /* Search for edsp and neon flag */
+ if(memcmp(buf, "Features", 8) == 0)
+ {
+ char *p;
+ p = strstr(buf, " edsp");
+ if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ flags |= OPUS_CPU_ARM_EDSP;
+
+ p = strstr(buf, " neon");
+ if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ flags |= OPUS_CPU_ARM_NEON;
+ }
+
+ /* Search for media capabilities (>= ARMv6) */
+ if(memcmp(buf, "CPU architecture:", 17) == 0)
+ {
+ int version;
+ version = atoi(buf+17);
+
+ if(version == 4)
+ flags |= OPUS_CPU_ARM_V4;
+
+ if(version >= 6)
+ flags |= OPUS_CPU_ARM_MEDIA;
+ }
+ }
+
+ fclose(cpuinfo);
+ }
+ return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+ "your platform. Reconfigure with --disable-asm (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+ opus_uint32 flags = opus_cpu_capa();
+
+ if(flags & OPUS_CPU_ARM_NEON)
+ return 4;
+ else if(flags & OPUS_CPU_ARM_MEDIA)
+ return 3;
+ else if(flags & OPUS_CPU_ARM_EDSP)
+ return 2;
+ else if(flags & OPUS_CPU_ARM_V4)
+ return 1;
+ else
+ return 0;
+}
diff --git a/src/armcpu.h b/src/armcpu.h
new file mode 100644
index 0000000..358df84
--- /dev/null
+++ b/src/armcpu.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifndef ARMCPU_H
+#define ARMCPU_H
+
+#include "opus_types.h"
+#include "os_support.h"
+
+#define OPUS_CPU_ARM_V4 (1)
+#define OPUS_CPU_ARM_EDSP (1<<1)
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON (1<<3)
+
+opus_uint32 opus_cpu_capa(void);
+int opus_select_arch(void);
+
+#endif
diff --git a/src/cpu_support.h b/src/cpu_support.h
new file mode 100644
index 0000000..bc3b4a3
--- /dev/null
+++ b/src/cpu_support.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#ifdef ARM_ASM
+
+#include "armcpu.h"
+
+/* We currently support C code and 4 ARM variants:
+ * arch[0] -> C
+ * arch[1] -> ARMv4
+ * arch[2] -> ARMv5E
+ * arch[3] -> ARMv6
+ * arch[4] -> NEON
+ */
+#define OPUS_ARCHMASK 4
+
+#else
+#define OPUS_ARCHMASK 0
+static inline opus_uint32 opus_cpu_capa(void)
+{
+ return 0;
+}
+
+static inline int opus_select_arch(void)
+{
+ return 0;
+}
+#endif
+
+#endif
diff --git a/src/opus_decoder.c b/src/opus_decoder.c
index f0b2b6f..6bc7091 100644
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -46,6 +46,7 @@
#include "structs.h"
#include "define.h"
#include "mathops.h"
+#include "cpu_support.h"
struct OpusDecoder {
int celt_dec_offset;
@@ -70,6 +71,7 @@ struct OpusDecoder {
#endif
opus_uint32 rangeFinal;
+ int arch;
};
#ifdef FIXED_POINT
@@ -119,6 +121,7 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels)
st->Fs = Fs;
st->DecControl.API_sampleRate = st->Fs;
st->DecControl.nChannelsAPI = st->channels;
+ st->arch = opus_select_arch();
/* Reset decoder */
ret = silk_InitDecoder( silk_dec );
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index b6424d6..305fad9 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -40,6 +40,7 @@
#include "arch.h"
#include "opus_private.h"
#include "os_support.h"
+#include "cpu_support.h"
#include "analysis.h"
#include "mathops.h"
#include "tuning_parameters.h"
@@ -103,6 +104,7 @@ struct OpusEncoder {
int analysis_offset;
#endif
opus_uint32 rangeFinal;
+ int arch;
};
/* Transition tables for the voice and music. First column is the
@@ -184,6 +186,8 @@ int opus_encoder_init(OpusEncoder* st, opus_int32 Fs, int channels, int applicat
st->Fs = Fs;
+ st->arch = opus_select_arch();
+
ret = silk_InitEncoder( silk_enc, &st->silk_mode );
if(ret)return OPUS_INTERNAL_ERROR;
More information about the opus
mailing list