[opus] [RFC PATCH v1 2/2] armv7(float): Optimize encode usecase using NE10 library
Viswanath Puttagunta
viswanath.puttagunta at linaro.org
Fri Jan 23 06:53:59 PST 2015
On 20 January 2015 at 11:37, Viswanath Puttagunta
<viswanath.puttagunta at linaro.org> wrote:
>
> Optimize opus encode (float only) usecase using ARM NE10
> library. Mainly effects opus_fft and ctl_mdct_forward
> and related functions.
>
> This optimization can be used for ARM CPUs that have NEON
> VFP unit. This patch only enables optimizations for ARMv7.
>
> Official ARM NE10 library page available at
> http://projectne10.github.io/Ne10/
>
> To enable this optimization, use
> --enable-intrinsics --with-NE10=<install_prefix>
> or
> --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir>
>
> Compile time checks made during configure process to make sure
> optimization option available only when compiler supports NEON
> instrinsics.
>
> Runtime checks made to make sure optimized functions only called
> on appropriate hardware.
> ---
> Makefile.am | 30 +--
> celt/arm/arm_celt_ne10_fft_map.c | 65 ++++++
> celt/arm/arm_celt_ne10_mdct_map.c | 53 +++++
> celt/arm/celt_ne10_fft.c | 101 ++++++++++
> celt/arm/celt_ne10_mdct.c | 159 +++++++++++++++
> celt/arm/fft_arm.h | 65 ++++++
> celt/arm/mdct_arm.h | 52 +++++
> celt/celt_encoder.c | 4 +-
> celt/dump_modes/Makefile | 21 +-
> celt/dump_modes/dump_mode_arm_ne10.c | 103 ++++++++++
> celt/dump_modes/dump_modes.c | 22 +-
> celt/dump_modes/dump_modes_arch.h | 14 ++
> celt/kiss_fft.c | 18 +-
> celt/kiss_fft.h | 44 +++-
> celt/mdct.c | 2 +-
> celt/mdct.h | 29 ++-
> celt/static_modes_float.h | 25 +++
> celt/static_modes_float_arm_ne10.h | 367 ++++++++++++++++++++++++++++++++++
> celt/tests/test_unit_dft.c | 14 +-
> celt/tests/test_unit_mdct.c | 19 +-
> celt_headers.mk | 3 +
> celt_sources.mk | 6 +
> configure.ac | 81 ++++++++
> src/analysis.c | 2 +-
> src/opus_multistream_encoder.c | 3 +-
> 25 files changed, 1278 insertions(+), 24 deletions(-)
> create mode 100644 celt/arm/arm_celt_ne10_fft_map.c
> create mode 100644 celt/arm/arm_celt_ne10_mdct_map.c
> create mode 100644 celt/arm/celt_ne10_fft.c
> create mode 100644 celt/arm/celt_ne10_mdct.c
> create mode 100644 celt/arm/fft_arm.h
> create mode 100644 celt/arm/mdct_arm.h
> create mode 100644 celt/dump_modes/dump_mode_arm_ne10.c
> create mode 100644 celt/dump_modes/dump_modes_arch.h
> create mode 100644 celt/static_modes_float_arm_ne10.h
>
> diff --git a/Makefile.am b/Makefile.am
> index 95323ca..5ad93aa 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -10,7 +10,7 @@ lib_LTLIBRARIES = libopus.la
> DIST_SUBDIRS = doc
>
> AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
> - -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
> + -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
>
> include celt_sources.mk
> include silk_sources.mk
> @@ -47,6 +47,10 @@ CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
> OPUS_ARM_NEON_INTR_CPPFLAGS = -mfpu=neon
> endif
>
> +if HAVE_ARM_NE10
> +CELT_SOURCES += $(CELT_SOURCES_ARM_NE10)
> +endif
> +
> if OPUS_ARM_EXTERNAL_ASM
> nodist_libopus_la_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S)
> BUILT_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) \
> @@ -64,7 +68,7 @@ include opus_headers.mk
>
> libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
> libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
> -libopus_la_LIBADD = $(LIBM)
> +libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
>
> pkginclude_HEADERS = include/opus.h include/opus_multistream.h include/opus_types.h include/opus_defines.h
>
> @@ -77,32 +81,32 @@ TESTS = celt/tests/test_unit_types celt/tests/test_unit_mathops celt/tests/test_
>
> opus_demo_SOURCES = src/opus_demo.c
>
> -opus_demo_LDADD = libopus.la $(LIBM)
> +opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> repacketizer_demo_SOURCES = src/repacketizer_demo.c
>
> -repacketizer_demo_LDADD = libopus.la $(LIBM)
> +repacketizer_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> opus_compare_SOURCES = src/opus_compare.c
> opus_compare_LDADD = $(LIBM)
>
> tests_test_opus_api_SOURCES = tests/test_opus_api.c tests/test_opus_common.h
> -tests_test_opus_api_LDADD = libopus.la $(LIBM)
> +tests_test_opus_api_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> tests_test_opus_encode_SOURCES = tests/test_opus_encode.c tests/test_opus_common.h
> -tests_test_opus_encode_LDADD = libopus.la $(LIBM)
> +tests_test_opus_encode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> tests_test_opus_decode_SOURCES = tests/test_opus_decode.c tests/test_opus_common.h
> -tests_test_opus_decode_LDADD = libopus.la $(LIBM)
> +tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
> -tests_test_opus_padding_LDADD = libopus.la $(LIBM)
> +tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
> celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
>
> celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
> -celt_tests_test_unit_dft_LDADD = $(LIBM)
> +celt_tests_test_unit_dft_LDADD = $(NE10_LIBS) $(LIBM)
>
> celt_tests_test_unit_entropy_SOURCES = celt/tests/test_unit_entropy.c
> celt_tests_test_unit_entropy_LDADD = $(LIBM)
> @@ -119,7 +123,7 @@ endif
> endif
>
> celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
> -celt_tests_test_unit_mdct_LDADD = $(LIBM)
> +celt_tests_test_unit_mdct_LDADD = $(NE10_LIBS) $(LIBM)
>
> celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
> celt_tests_test_unit_rotation_LDADD = $(LIBM)
> @@ -269,6 +273,8 @@ endif
>
> if OPUS_ARM_NEON_INTR
> CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
> - %test_unit_rotation.o %test_unit_mathops.o
> -$(CELT_ARM_NEON_INTR_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CPPFLAGS)
> + $(CELT_SOURCES_ARM_NE10:.c=.lo) \
> + %test_unit_rotation.o %test_unit_mathops.o \
> + %test_unit_mdct.o %test_unit_dft.o
> +$(CELT_ARM_NEON_INTR_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CPPFLAGS) $(NE10_CFLAGS)
> endif
> diff --git a/celt/arm/arm_celt_ne10_fft_map.c b/celt/arm/arm_celt_ne10_fft_map.c
> new file mode 100644
> index 0000000..5bb7b5f
> --- /dev/null
> +++ b/celt/arm/arm_celt_ne10_fft_map.c
> @@ -0,0 +1,65 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file arm_celt_ne10_fft_map.c
> + @brief Map for ARM Neon optimizations for fft using NE10
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +
> +#include "kiss_fft.h"
> +
> +#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
> +#ifdef CUSTOM_MODES
> +int (*const OPUS_FFT_ALLOC_ARCH[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
> + opus_fft_alloc_arch_c, /* ARMv4 */
> + opus_fft_alloc_arch_c, /* EDSP */
> + opus_fft_alloc_arch_c, /* Media */
> + opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
> +};
> +
> +void (*const OPUS_FFT_FREE_ARCH[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
> + opus_fft_free_arch_c, /* ARMv4 */
> + opus_fft_free_arch_c, /* EDSP */
> + opus_fft_free_arch_c, /* Media */
> + opus_fft_free_arm_float_neon /* Neon with NE10 */
> +};
> +#endif /* CUSTOM_MODES */
> +
> +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout) = {
> + opus_fft_c, /* ARMv4 */
> + opus_fft_c, /* EDSP */
> + opus_fft_c, /* Media */
> + opus_fft_float_neon /* Neon with NE10 */
> +};
> +
> +#endif
> diff --git a/celt/arm/arm_celt_ne10_mdct_map.c b/celt/arm/arm_celt_ne10_mdct_map.c
> new file mode 100644
> index 0000000..6df7af3
> --- /dev/null
> +++ b/celt/arm/arm_celt_ne10_mdct_map.c
> @@ -0,0 +1,53 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file arm_celt_ne10_mdct_map.c
> + @brief Map for ARM Neon optimizations for mdct using NE10
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +
> +#include "kiss_fft.h"
> +#include "mdct.h"
> +
> +#if defined(OPUS_HAVE_RTCD) && defined(HAVE_ARM_NE10)
> +
> +void (*const CLT_MDCT_FORWARD_ARCH[OPUS_ARCHMASK+1])(const mdct_lookup *l,
> + kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window,
> + int overlap, int shift, int stride) = {
> + clt_mdct_forward_c, /* ARMv4 */
> + clt_mdct_forward_c, /* EDSP */
> + clt_mdct_forward_c, /* Media */
> + clt_mdct_forward_float_neon /* Neon with NE10 */
> +};
> +
> +#endif
> diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
> new file mode 100644
> index 0000000..fe00b25
> --- /dev/null
> +++ b/celt/arm/celt_ne10_fft.c
> @@ -0,0 +1,101 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file celt_ne10_fft.c
> + @brief ARM Neon optimizations for fft using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef SKIP_CONFIG_H
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +#endif
> +
> +#include <arm_neon.h>
> +#include <NE10_init.h>
> +#include <NE10_dsp.h>
> +#include "../kiss_fft.h"
> +#include "stack_alloc.h"
> +#include "os_support.h"
> +#include "stack_alloc.h"
> +
> +#ifdef CUSTOM_MODES
> +
> +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
> +{
> + st->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
> + if (st->priv == NULL) {
> + printf("Unable to ne10 alloc\n");
> + return -1;
> + }
> + return 0;
> +}
> +
> +void opus_fft_free_arm_float_neon(kiss_fft_state *st)
> +{
> + ne10_fft_cfg_float32_t cfg = (ne10_fft_cfg_float32_t)st->priv;
> +
> + if (cfg)
> + free((void *)cfg);
> +}
> +#endif
> +
> +void opus_fft_float_neon(const kiss_fft_state *st,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout)
> +{
> + ne10_fft_cfg_float32_t cfg = (ne10_fft_cfg_float32_t)st->priv;
> + VARDECL(ne10_fft_cpx_float32_t, temp);
> + VARDECL(ne10_fft_cpx_float32_t, tempin);
> + SAVE_STACK;
> + int N2 = st->nfft >> 1;
> + float32x4_t inq, outq;
> + float32x2_t scale;
> + float *in = (float *)fin;
> + float *out;
> + int i;
> + ALLOC(temp, st->nfft, ne10_fft_cpx_float32_t);
> + ALLOC(tempin, st->nfft, ne10_fft_cpx_float32_t);
> +
> + out = (float *)tempin;
> + scale = vld1_dup_f32(&st->scale);
> + for (i = 0; i < N2; i++) {
> + inq = vld1q_f32(in);
> + in += 4;
> + outq = vmulq_lane_f32(inq, scale, 0);
> + vst1q_f32(out, outq);
> + out += 4;
> + }
> +
> + cfg->buffer = (ne10_fft_cpx_float32_t *)&temp[0];
Speaking of being thread safe.. I don't think this is thread safe.. I
think what will work here is:
ne10_fft_state_float32_t state;
memcpy((void *)&state, st->priv, sizeof(ne10_fft_state_float32_t);
state.buffer = (ne10_fft_cpx_float32_t *)&temp[0];
ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
(ne10_fft_cpx_float32_t *)tempin,
&state, 0);
I will correct this in RFCv2 after I receive comprehensive review.
> +
> + ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
> + (ne10_fft_cpx_float32_t *)tempin,
> + cfg, 0);
> + RESTORE_STACK;
> +}
> diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c
> new file mode 100644
> index 0000000..177bda2
> --- /dev/null
> +++ b/celt/arm/celt_ne10_mdct.c
> @@ -0,0 +1,159 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file celt_ne10_mdct.c
> + @brief ARM Neon optimizations for mdct using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef SKIP_CONFIG_H
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +#endif
> +
> +#include "../kiss_fft.h"
> +#include "_kiss_fft_guts.h"
> +#include "../mdct.h"
> +#include "stack_alloc.h"
> +#include "os_support.h"
> +#include "stack_alloc.h"
> +
> +void clt_mdct_forward_float_neon(const mdct_lookup *l,
> + kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window,
> + int overlap, int shift, int stride)
> +{
> + int i;
> + int N, N2, N4;
> + VARDECL(kiss_fft_scalar, f);
> + VARDECL(kiss_fft_cpx, f2);
> + const kiss_fft_state *st = l->kfft[shift];
> + const kiss_twiddle_scalar *trig;
> +
> + SAVE_STACK;
> +
> + N = l->n;
> + trig = l->trig;
> + for (i=0;i<shift;i++)
> + {
> + N >>= 1;
> + trig += N;
> + }
> + N2 = N>>1;
> + N4 = N>>2;
> +
> + ALLOC(f, N2, kiss_fft_scalar);
> + ALLOC(f2, N4, kiss_fft_cpx);
> +
> + /* Consider the input to be composed of four blocks: [a, b, c, d] */
> + /* Window, shuffle, fold */
> + {
> + /* Temp pointers to make it really clear to the compiler what we're doing */
> + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
> + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
> + kiss_fft_scalar * OPUS_RESTRICT yp = f;
> + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
> + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
> + for(i=0;i<((overlap+3)>>2);i++)
> + {
> + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
> + *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
> + *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
> + xp1+=2;
> + xp2-=2;
> + wp1+=2;
> + wp2-=2;
> + }
> + wp1 = window;
> + wp2 = window+overlap-1;
> + for(;i<N4-((overlap+3)>>2);i++)
> + {
> + /* Real part arranged as a-bR, Imag part arranged as -c-dR */
> + *yp++ = *xp2;
> + *yp++ = *xp1;
> + xp1+=2;
> + xp2-=2;
> + }
> + for(;i<N4;i++)
> + {
> + /* Real part arranged as a-bR, Imag part arranged as -c-dR */
> + *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
> + *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
> + xp1+=2;
> + xp2-=2;
> + wp1+=2;
> + wp2-=2;
> + }
> + }
> + /* Pre-rotation */
> + {
> + kiss_fft_scalar * OPUS_RESTRICT yp = f;
> + const kiss_twiddle_scalar *t = &trig[0];
> + for(i=0;i<N4;i++)
> + {
> + kiss_fft_cpx yc;
> + kiss_twiddle_scalar t0, t1;
> + kiss_fft_scalar re, im, yr, yi;
> + t0 = t[i];
> + t1 = t[N4+i];
> + re = *yp++;
> + im = *yp++;
> + yr = S_MUL(re,t0) - S_MUL(im,t1);
> + yi = S_MUL(im,t0) + S_MUL(re,t1);
> + yc.r = yr;
> + yc.i = yi;
> + f2[i] = yc;
> + }
> + }
> +
> + /* N/4 complex FFT, does not downscale anymore */
> + opus_fft(st, f2, (kiss_fft_cpx *)f, opus_select_arch());
> +
> + /* Post-rotate */
> + {
> + /* Temp pointers to make it really clear to the compiler what we're doing */
> + const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
> + kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
> + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
> + const kiss_twiddle_scalar *t = &trig[0];
> + /* Temp pointers to make it really clear to the compiler what we're doing */
> + for(i=0;i<N4;i++)
> + {
> + kiss_fft_scalar yr, yi;
> + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
> + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
> + *yp1 = yr;
> + *yp2 = yi;
> + fp++;
> + yp1 += 2*stride;
> + yp2 -= 2*stride;
> + }
> + }
> + RESTORE_STACK;
> +}
> diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h
> new file mode 100644
> index 0000000..16f008b
> --- /dev/null
> +++ b/celt/arm/fft_arm.h
> @@ -0,0 +1,65 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file fft_arm.h
> + @brief ARM Neon Intrinsic optimizations for fft using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +
> +#if !defined(FFT_ARM_H)
> +#define FFT_ARM_H
> +
> +#include "config.h"
> +#include "kiss_fft.h"
> +
> +#if !defined(FIXED_POINT)
> +#if defined(HAVE_ARM_NE10)
> +
> +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
> +void opus_fft_free_arm_float_neon(kiss_fft_state *st);
> +
> +void opus_fft_float_neon(const kiss_fft_state *st,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout);
> +#if !defined(OPUS_HAVE_RTCD)
> +#define OVERRIDE_OPUS_FFT (1)
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> + opus_fft_alloc_arm_float_neon(_st)
> +
> +#define opus_fft_free_arch(_st, arch) opus_fft_free_arm_float_neon(_st)
> +
> +#define opus_fft(_st, _fin, _fout, arch) \
> + opus_fft_float_neon(_st, _fin, _fout)
> +
> +#endif /* OPUS_HAVE_RTCD */
> +
> +#endif /* HAVE_ARM_NE10 */
> +#endif /* FIXED_POINT */
> +
> +#endif
> diff --git a/celt/arm/mdct_arm.h b/celt/arm/mdct_arm.h
> new file mode 100644
> index 0000000..d0a8a8c
> --- /dev/null
> +++ b/celt/arm/mdct_arm.h
> @@ -0,0 +1,52 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file arm_mdct.h
> + @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#if !defined(MDCT_ARM_H)
> +#define MDCT_ARM_H
> +
> +#include "config.h"
> +#include "mdct.h"
> +
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +/** Compute a forward MDCT and scale by 4/N, trashes the input array */
> +void clt_mdct_forward_float_neon(const mdct_lookup *l, kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window, int overlap, int shift, int stride);
> +
> +#if !defined(OPUS_HAVE_RTCD)
> +#define OVERRIDE_OPUS_MDCT (1)
> +#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
> + clt_mdct_forward_float_neon((_l, _in, _out, _window, _int, _shift, _stride)
> +#endif /* OPUS_HAVE_RTCD */
> +#endif /* !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) */
> +
> +#endif
> diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
> index 86a3fbb..9de9a92 100644
> --- a/celt/celt_encoder.c
> +++ b/celt/celt_encoder.c
> @@ -435,7 +435,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
> for (b=0;b<B;b++)
> {
> /* Interleaving the sub-frames while doing the MDCTs */
> - clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
> + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
> + &out[b+c*N*B], mode->window, overlap, shift, B,
> + opus_select_arch());
> }
> } while (++c<CC);
> if (CC==2&&C==1)
> diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
> index 74d527e..8890706 100644
> --- a/celt/dump_modes/Makefile
> +++ b/celt/dump_modes/Makefile
> @@ -1,10 +1,29 @@
> +
> CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
> INCLUDES=-I. -I../ -I../.. -I../../include
>
> +SOURCES = dump_modes.c \
> + ../modes.c \
> + ../cwrs.c \
> + ../rate.c \
> + ../entenc.c \
> + ../entdec.c \
> + ../mathops.c \
> + ../mdct.c \
> + ../kiss_fft.c
> +
> +ifdef HAVE_ARM_NE10
> +CC = gcc
> +CFLAGS += -mfpu=neon
> +INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_NEON_INTR
> +LIBDIR = -l:$(NE10_LIBDIR)/libNE10.so
> +SOURCES += ../arm/celt_neon_intr.c dump_mode_arm_ne10.c
> +endif
> +
> all: dump_modes
>
> dump_modes:
> - $(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES dump_modes.c ../modes.c ../cwrs.c ../rate.c ../entenc.c ../entdec.c ../mathops.c ../mdct.c ../kiss_fft.c -o dump_modes -lm
> + $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBDIR) -lm
>
> clean:
> rm -f dump_modes
> diff --git a/celt/dump_modes/dump_mode_arm_ne10.c b/celt/dump_modes/dump_mode_arm_ne10.c
> new file mode 100644
> index 0000000..30c7423
> --- /dev/null
> +++ b/celt/dump_modes/dump_mode_arm_ne10.c
> @@ -0,0 +1,103 @@
> +/* Copyright (c) 2015-2016 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include "modes.h"
> +#include "dump_modes_arch.h"
> +#include <NE10_dsp.h>
> +
> +static FILE *file;
> +
> +void dump_modes_arch_init(CELTMode **modes, int nb_modes)
> +{
> + int i;
> +
> + file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
> + fprintf(file, "/* The contents of this file was automatically generated by\n");
> + fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
> + for (i=0;i<nb_modes;i++)
> + {
> + CELTMode *mode = modes[i];
> + fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
> + }
> + fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
> + fprintf(file, "#include <NE10_init.h>\n");
> +}
> +
> +void dump_modes_arch_finalize()
> +{
> + fclose(file);
> +}
> +
> +void dump_mode_arch(CELTMode *mode)
> +{
> + int k, j;
> + int mdctSize;
> +
> + mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
> +
> + fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
> + fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
> + printf("Printing ne10 values\n");
> + ne10_fft_cfg_float32_t cfg;
> + /* cfg->factors */
> + for(k=0;k<=mode->mdct.maxshift;k++) {
> + fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
> + mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
> + for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
> + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->priv;
> + fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
> + }
> + fprintf (file, "};\n");
> + }
> +
> + /* cfg->twiddles */
> + for(k=0;k<=mode->mdct.maxshift;k++) {
> + fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n",
> + mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
> + for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
> + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->priv;
> + fprintf(file, "{%f,%f},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
> + }
> + fprintf (file, "};\n");
> + }
> +
> + for(k=0;k<=mode->mdct.maxshift;k++) {
> + fprintf(file, "static const ne10_fft_state_float32_t cfg_arch_%d = {\n",
> + mode->mdct.kfft[k]->nfft);
> + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->priv;
> + fprintf(file, "%d,\n", cfg->nfft);
> + fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf(file, "NULL,\n"); /* buffer */
> + fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n",
> + mode->mdct.kfft[k]->nfft, cfg->nfft);
> + fprintf(file, "};\n");
> + }
> + fprintf(file, "#endif /*end NE10_FFT_PARAMS%d_%d*/\n", mode->Fs, mdctSize);
> +}
> diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
> index ae6a8c1..80947ec 100644
> --- a/celt/dump_modes/dump_modes.c
> +++ b/celt/dump_modes/dump_modes.c
> @@ -35,6 +35,7 @@
> #include "modes.h"
> #include "celt.h"
> #include "rate.h"
> +#include "dump_modes_arch.h"
>
> #define INT16 "%d"
> #define INT32 "%d"
> @@ -62,6 +63,10 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n");
> fprintf(file, "#include \"modes.h\"\n");
> fprintf(file, "#include \"rate.h\"\n");
> + fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
> + fprintf(file, "#define OVERRIDE_FFT 1\n");
> + fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
> + fprintf(file, "#endif\n");
>
> fprintf(file, "\n");
>
> @@ -149,6 +154,9 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
> fprintf (file, "};\n");
>
> +#ifdef OVERRIDE_FFT
> + dump_mode_arch(mode);
> +#endif
> /* FFT Bitrev tables */
> for (k=0;k<=mode->mdct.maxshift;k++)
> {
> @@ -183,6 +191,13 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf (file, "}, /* factors */\n");
> fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft);
> fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize);
> +
> + fprintf (file, "#ifdef OVERRIDE_FFT\n");
> + fprintf (file, "(void *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf (file, "#else\n");
> + fprintf (file, "NULL,\n");
> + fprintf(file, "#endif\n");
> +
> fprintf (file, "};\n");
>
> fprintf(file, "#endif\n");
> @@ -205,7 +220,6 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf(file, "#endif\n");
> fprintf(file, "\n");
>
> -
> /* Print the actual mode data */
> fprintf(file, "static const CELTMode mode%d_%d_%d = {\n", mode->Fs, mdctSize, mode->overlap);
> fprintf(file, INT32 ", /* Fs */\n", mode->Fs);
> @@ -323,8 +337,14 @@ int main(int argc, char **argv)
> }
> }
> file = fopen(BASENAME ".h", "w");
> +#ifdef OVERRIDE_FFT
> + dump_modes_arch_init(m, nb);
> +#endif
> dump_modes(file, m, nb);
> fclose(file);
> +#ifdef OVERRIDE_FFT
> + dump_modes_arch_finalize();
> +#endif
> for (i=0;i<nb;i++)
> opus_custom_mode_destroy(m[i]);
> free(m);
> diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
> new file mode 100644
> index 0000000..f7df55b
> --- /dev/null
> +++ b/celt/dump_modes/dump_modes_arch.h
> @@ -0,0 +1,14 @@
> +#ifndef DUMP_MODE_ARCH_H
> +#define DUMP_MODE_ARCH_H
> +
> +void dump_modes_arch_init();
> +void dump_mode_arch(CELTMode *mode);
> +void dump_modes_arch_finalize();
> +
> +#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
> +
> +#if defined(HAVE_ARM_NE10)
> +#define OVERRIDE_FFT (1)
> +#endif
> +
> +#endif
> diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
> index cc487fc..9a76206 100644
> --- a/celt/kiss_fft.c
> +++ b/celt/kiss_fft.c
> @@ -423,6 +423,11 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
> #endif
> }
>
> +int opus_fft_alloc_arch_c(kiss_fft_state *st) {
> + (void)st;
> + return 0;
> +}
> +
> /*
> *
> * Allocates all necessary storage space for the fft and ifft.
> @@ -478,6 +483,10 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
> if (st->bitrev==NULL)
> goto fail;
> compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
> +
> + /* Initialize architecture specific fft parameters */
> + if (opus_fft_alloc_arch(st, opus_select_arch()))
> + goto fail;
> }
> return st;
> fail:
> @@ -485,15 +494,20 @@ fail:
> return NULL;
> }
>
> -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
> +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem)
> {
> return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
> }
>
> +void opus_fft_free_arch_c(kiss_fft_state *st) {
> + (void)st;
> +}
> +
> void opus_fft_free(const kiss_fft_state *cfg)
> {
> if (cfg)
> {
> + opus_fft_free_arch((kiss_fft_state *)cfg, opus_select_arch());
> opus_free((opus_int16*)cfg->bitrev);
> if (cfg->shift < 0)
> opus_free((kiss_twiddle_cpx*)cfg->twiddles);
> @@ -551,7 +565,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
> }
> }
>
> -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
> +void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
> {
> int i;
> opus_val16 scale;
> diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h
> index 390b54d..f9232f9 100644
> --- a/celt/kiss_fft.h
> +++ b/celt/kiss_fft.h
> @@ -32,6 +32,7 @@
> #include <stdlib.h>
> #include <math.h>
> #include "arch.h"
> +#include "cpu_support.h"
>
> #ifdef __cplusplus
> extern "C" {
> @@ -59,6 +60,7 @@ extern "C" {
> # define kiss_twiddle_scalar float
> # define KF_SUFFIX _celt_single
> # endif
> +
> #endif
>
> typedef struct {
> @@ -87,8 +89,13 @@ typedef struct kiss_fft_state{
> opus_int16 factors[2*MAXFACTORS];
> const opus_int16 *bitrev;
> const kiss_twiddle_cpx *twiddles;
> + void *priv; /* Used by arch specfic optimizations */
> } kiss_fft_state;
>
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +#include "arm/fft_arm.h"
> +#endif
> +
> /*typedef struct kiss_fft_state* kiss_fft_cfg;*/
>
> /**
> @@ -128,7 +135,7 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
> * Note that each element is complex and can be accessed like
> f[k].r and f[k].i
> * */
> -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
> +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
> void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
>
> void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
> @@ -136,6 +143,41 @@ void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
>
> void opus_fft_free(const kiss_fft_state *cfg);
>
> +
> +void opus_fft_free_arch_c(kiss_fft_state *st);
> +int opus_fft_alloc_arch_c(kiss_fft_state *st);
> +
> +#if !defined(OVERRIDE_OPUS_FFT)
> +/* Is run-time CPU detection enabled on this platform? */
> +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
> +
> +int (*const OPUS_FFT_ALLOC_ARCH[OPUS_ARCHMASK+1])(kiss_fft_state *st);
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> + ((*OPUS_FFT_ALLOC_ARCH[(arch)&OPUS_ARCHMASK])(_st))
> +
> +void (*const OPUS_FFT_FREE_ARCH[OPUS_ARCHMASK+1])(kiss_fft_state *st);
> +#define opus_fft_free_arch(_st, arch) \
> + ((*OPUS_FFT_FREE_ARCH[(arch)&OPUS_ARCHMASK])(_st))
> +
> +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout);
> +#define opus_fft(_cfg, _fin, _fout, arch) \
> + ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
> +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> + opus_fft_alloc_arch_c(_st)
> +
> +#define opus_fft_free_arch(_st, arch) \
> + opus_fft_free_arch_c(_st)
> +
> +#define opus_fft(_cfg, _fin, _fout, arch) \
> + opus_fft_c(_cfg, _fin, _fout)
> +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/celt/mdct.c b/celt/mdct.c
> index 2795d90..7e55157 100644
> --- a/celt/mdct.c
> +++ b/celt/mdct.c
> @@ -116,7 +116,7 @@ void clt_mdct_clear(mdct_lookup *l)
>
> /* Forward MDCT trashes the input array */
> #ifndef OVERRIDE_clt_mdct_forward
> -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
> +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
> const opus_val16 *window, int overlap, int shift, int stride)
> {
> int i;
> diff --git a/celt/mdct.h b/celt/mdct.h
> index d721821..52d7cca 100644
> --- a/celt/mdct.h
> +++ b/celt/mdct.h
> @@ -53,11 +53,16 @@ typedef struct {
> const kiss_twiddle_scalar * OPUS_RESTRICT trig;
> } mdct_lookup;
>
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +#include "arm/mdct_arm.h"
> +#endif
> +
> +
> int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
> void clt_mdct_clear(mdct_lookup *l);
>
> /** Compute a forward MDCT and scale by 4/N, trashes the input array */
> -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
> +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
> kiss_fft_scalar * OPUS_RESTRICT out,
> const opus_val16 *window, int overlap, int shift, int stride);
>
> @@ -67,4 +72,26 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
> kiss_fft_scalar * OPUS_RESTRICT out,
> const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
>
> +#if !defined(OVERRIDE_OPUS_MDCT)
> +/* Is run-time CPU detection enabled on this platform? */
> +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
> +
> +void (*const CLT_MDCT_FORWARD_ARCH[OPUS_ARCHMASK+1])(const mdct_lookup *l,
> + kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window,
> + int overlap, int shift, int stride);
> +
> +
> +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, arch) \
> + ((*CLT_MDCT_FORWARD_ARCH[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
> + _window, _overlap, _shift, _stride))
> +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +
> +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, arch) \
> + clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride)
> +
> +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
> +
> #endif
> diff --git a/celt/static_modes_float.h b/celt/static_modes_float.h
> index 2fadb62..e115b79 100644
> --- a/celt/static_modes_float.h
> +++ b/celt/static_modes_float.h
> @@ -4,6 +4,11 @@
> #include "modes.h"
> #include "rate.h"
>
> +#ifdef HAVE_ARM_NE10
> +#define OVERRIDE_FFT 1
> +#include "static_modes_float_arm_ne10.h"
> +#endif
> +
> #ifndef DEF_WINDOW120
> #define DEF_WINDOW120
> static const opus_val16 window120[120] = {
> @@ -431,6 +436,11 @@ static const kiss_fft_state fft_state48000_960_0 = {
> {5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev480, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(void *)&cfg_arch_480,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> @@ -443,6 +453,11 @@ static const kiss_fft_state fft_state48000_960_1 = {
> {5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev240, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(void *)&cfg_arch_240,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> @@ -455,6 +470,11 @@ static const kiss_fft_state fft_state48000_960_2 = {
> {5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev120, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(void *)&cfg_arch_120,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> @@ -467,6 +487,11 @@ static const kiss_fft_state fft_state48000_960_3 = {
> {5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev60, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(void *)&cfg_arch_60,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> diff --git a/celt/static_modes_float_arm_ne10.h b/celt/static_modes_float_arm_ne10.h
> new file mode 100644
> index 0000000..4229048
> --- /dev/null
> +++ b/celt/static_modes_float_arm_ne10.h
> @@ -0,0 +1,367 @@
> +/* The contents of this file was automatically generated by
> + * dump_mode_arm_ne10.c with arguments: 48000 960
> + * It contains static definitions for some pre-defined modes. */
> +#include <NE10_init.h>
> +#ifndef NE10_FFT_PARAMS48000_960
> +#define NE10_FFT_PARAMS48000_960
> +static const ne10_int32_t ne10_factors_480[64] = {
> +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_240[64] = {
> +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_120[64] = {
> +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_60[64] = {
> +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
> +{1.000000,0.000000}, {1.000000,-0.000000}, {1.000000,-0.000000},
> +{1.000000,-0.000000}, {0.913545,-0.406737}, {0.669131,-0.743145},
> +{1.000000,-0.000000}, {0.669131,-0.743145}, {-0.104529,-0.994522},
> +{1.000000,-0.000000}, {0.309017,-0.951057}, {-0.809017,-0.587785},
> +{1.000000,-0.000000}, {-0.104529,-0.994522}, {-0.978148,0.207912},
> +{1.000000,-0.000000}, {0.978148,-0.207912}, {0.913545,-0.406737},
> +{0.809017,-0.587785}, {0.669131,-0.743145}, {0.500000,-0.866025},
> +{0.309017,-0.951057}, {0.104528,-0.994522}, {-0.104529,-0.994522},
> +{-0.309017,-0.951056}, {-0.500000,-0.866025}, {-0.669131,-0.743145},
> +{-0.809017,-0.587785}, {-0.913545,-0.406737}, {-0.978148,-0.207912},
> +{1.000000,-0.000000}, {0.998630,-0.052336}, {0.994522,-0.104528},
> +{0.987688,-0.156434}, {0.978148,-0.207912}, {0.965926,-0.258819},
> +{0.951057,-0.309017}, {0.933580,-0.358368}, {0.913545,-0.406737},
> +{0.891007,-0.453991}, {0.866025,-0.500000}, {0.838671,-0.544639},
> +{0.809017,-0.587785}, {0.777146,-0.629320}, {0.743145,-0.669131},
> +{0.707107,-0.707107}, {0.669131,-0.743145}, {0.629320,-0.777146},
> +{0.587785,-0.809017}, {0.544639,-0.838671}, {0.500000,-0.866025},
> +{0.453991,-0.891007}, {0.406737,-0.913545}, {0.358368,-0.933580},
> +{0.309017,-0.951057}, {0.258819,-0.965926}, {0.207912,-0.978148},
> +{0.156434,-0.987688}, {0.104528,-0.994522}, {0.052336,-0.998630},
> +{1.000000,-0.000000}, {0.994522,-0.104528}, {0.978148,-0.207912},
> +{0.951057,-0.309017}, {0.913545,-0.406737}, {0.866025,-0.500000},
> +{0.809017,-0.587785}, {0.743145,-0.669131}, {0.669131,-0.743145},
> +{0.587785,-0.809017}, {0.500000,-0.866025}, {0.406737,-0.913545},
> +{0.309017,-0.951057}, {0.207912,-0.978148}, {0.104528,-0.994522},
> +{-0.000000,-1.000000}, {-0.104529,-0.994522}, {-0.207912,-0.978148},
> +{-0.309017,-0.951056}, {-0.406737,-0.913545}, {-0.500000,-0.866025},
> +{-0.587785,-0.809017}, {-0.669131,-0.743145}, {-0.743145,-0.669130},
> +{-0.809017,-0.587785}, {-0.866025,-0.500000}, {-0.913545,-0.406737},
> +{-0.951057,-0.309017}, {-0.978148,-0.207912}, {-0.994522,-0.104528},
> +{1.000000,-0.000000}, {0.987688,-0.156434}, {0.951057,-0.309017},
> +{0.891007,-0.453991}, {0.809017,-0.587785}, {0.707107,-0.707107},
> +{0.587785,-0.809017}, {0.453991,-0.891007}, {0.309017,-0.951057},
> +{0.156434,-0.987688}, {-0.000000,-1.000000}, {-0.156434,-0.987688},
> +{-0.309017,-0.951056}, {-0.453991,-0.891006}, {-0.587785,-0.809017},
> +{-0.707107,-0.707107}, {-0.809017,-0.587785}, {-0.891007,-0.453990},
> +{-0.951057,-0.309017}, {-0.987688,-0.156434}, {-1.000000,0.000000},
> +{-0.987688,0.156435}, {-0.951057,0.309017}, {-0.891007,0.453991},
> +{-0.809017,0.587785}, {-0.707107,0.707107}, {-0.587785,0.809017},
> +{-0.453990,0.891007}, {-0.309017,0.951056}, {-0.156435,0.987688},
> +{1.000000,-0.000000}, {0.999914,-0.013090}, {0.999657,-0.026177},
> +{0.999229,-0.039260}, {0.998630,-0.052336}, {0.997859,-0.065403},
> +{0.996917,-0.078459}, {0.995805,-0.091502}, {0.994522,-0.104528},
> +{0.993068,-0.117537}, {0.991445,-0.130526}, {0.989651,-0.143493},
> +{0.987688,-0.156434}, {0.985556,-0.169350}, {0.983255,-0.182236},
> +{0.980785,-0.195090}, {0.978148,-0.207912}, {0.975342,-0.220697},
> +{0.972370,-0.233445}, {0.969231,-0.246153}, {0.965926,-0.258819},
> +{0.962455,-0.271440}, {0.958820,-0.284015}, {0.955020,-0.296542},
> +{0.951057,-0.309017}, {0.946930,-0.321439}, {0.942641,-0.333807},
> +{0.938191,-0.346117}, {0.933580,-0.358368}, {0.928810,-0.370557},
> +{0.923880,-0.382683}, {0.918791,-0.394744}, {0.913545,-0.406737},
> +{0.908143,-0.418660}, {0.902585,-0.430511}, {0.896873,-0.442289},
> +{0.891007,-0.453991}, {0.884988,-0.465615}, {0.878817,-0.477159},
> +{0.872496,-0.488621}, {0.866025,-0.500000}, {0.859406,-0.511293},
> +{0.852640,-0.522499}, {0.845728,-0.533615}, {0.838671,-0.544639},
> +{0.831470,-0.555570}, {0.824126,-0.566406}, {0.816642,-0.577145},
> +{0.809017,-0.587785}, {0.801254,-0.598325}, {0.793353,-0.608761},
> +{0.785317,-0.619094}, {0.777146,-0.629320}, {0.768842,-0.639439},
> +{0.760406,-0.649448}, {0.751840,-0.659346}, {0.743145,-0.669131},
> +{0.734322,-0.678801}, {0.725374,-0.688355}, {0.716302,-0.697791},
> +{0.707107,-0.707107}, {0.697790,-0.716302}, {0.688355,-0.725374},
> +{0.678801,-0.734323}, {0.669131,-0.743145}, {0.659346,-0.751840},
> +{0.649448,-0.760406}, {0.639439,-0.768842}, {0.629320,-0.777146},
> +{0.619094,-0.785317}, {0.608761,-0.793353}, {0.598325,-0.801254},
> +{0.587785,-0.809017}, {0.577145,-0.816642}, {0.566406,-0.824126},
> +{0.555570,-0.831470}, {0.544639,-0.838671}, {0.533615,-0.845728},
> +{0.522498,-0.852640}, {0.511293,-0.859406}, {0.500000,-0.866025},
> +{0.488621,-0.872496}, {0.477159,-0.878817}, {0.465614,-0.884988},
> +{0.453991,-0.891007}, {0.442289,-0.896873}, {0.430511,-0.902585},
> +{0.418660,-0.908143}, {0.406737,-0.913545}, {0.394744,-0.918791},
> +{0.382683,-0.923880}, {0.370557,-0.928810}, {0.358368,-0.933580},
> +{0.346117,-0.938191}, {0.333807,-0.942641}, {0.321439,-0.946930},
> +{0.309017,-0.951057}, {0.296542,-0.955020}, {0.284015,-0.958820},
> +{0.271440,-0.962455}, {0.258819,-0.965926}, {0.246153,-0.969231},
> +{0.233445,-0.972370}, {0.220697,-0.975342}, {0.207912,-0.978148},
> +{0.195090,-0.980785}, {0.182236,-0.983255}, {0.169349,-0.985556},
> +{0.156434,-0.987688}, {0.143493,-0.989651}, {0.130526,-0.991445},
> +{0.117537,-0.993068}, {0.104528,-0.994522}, {0.091502,-0.995805},
> +{0.078459,-0.996917}, {0.065403,-0.997859}, {0.052336,-0.998630},
> +{0.039260,-0.999229}, {0.026177,-0.999657}, {0.013090,-0.999914},
> +{1.000000,-0.000000}, {0.999657,-0.026177}, {0.998630,-0.052336},
> +{0.996917,-0.078459}, {0.994522,-0.104528}, {0.991445,-0.130526},
> +{0.987688,-0.156434}, {0.983255,-0.182236}, {0.978148,-0.207912},
> +{0.972370,-0.233445}, {0.965926,-0.258819}, {0.958820,-0.284015},
> +{0.951057,-0.309017}, {0.942641,-0.333807}, {0.933580,-0.358368},
> +{0.923880,-0.382683}, {0.913545,-0.406737}, {0.902585,-0.430511},
> +{0.891007,-0.453991}, {0.878817,-0.477159}, {0.866025,-0.500000},
> +{0.852640,-0.522499}, {0.838671,-0.544639}, {0.824126,-0.566406},
> +{0.809017,-0.587785}, {0.793353,-0.608761}, {0.777146,-0.629320},
> +{0.760406,-0.649448}, {0.743145,-0.669131}, {0.725374,-0.688355},
> +{0.707107,-0.707107}, {0.688355,-0.725374}, {0.669131,-0.743145},
> +{0.649448,-0.760406}, {0.629320,-0.777146}, {0.608761,-0.793353},
> +{0.587785,-0.809017}, {0.566406,-0.824126}, {0.544639,-0.838671},
> +{0.522498,-0.852640}, {0.500000,-0.866025}, {0.477159,-0.878817},
> +{0.453991,-0.891007}, {0.430511,-0.902585}, {0.406737,-0.913545},
> +{0.382683,-0.923880}, {0.358368,-0.933580}, {0.333807,-0.942641},
> +{0.309017,-0.951057}, {0.284015,-0.958820}, {0.258819,-0.965926},
> +{0.233445,-0.972370}, {0.207912,-0.978148}, {0.182236,-0.983255},
> +{0.156434,-0.987688}, {0.130526,-0.991445}, {0.104528,-0.994522},
> +{0.078459,-0.996917}, {0.052336,-0.998630}, {0.026177,-0.999657},
> +{-0.000000,-1.000000}, {-0.026177,-0.999657}, {-0.052336,-0.998630},
> +{-0.078459,-0.996917}, {-0.104529,-0.994522}, {-0.130526,-0.991445},
> +{-0.156434,-0.987688}, {-0.182236,-0.983255}, {-0.207912,-0.978148},
> +{-0.233445,-0.972370}, {-0.258819,-0.965926}, {-0.284015,-0.958820},
> +{-0.309017,-0.951056}, {-0.333807,-0.942641}, {-0.358368,-0.933580},
> +{-0.382684,-0.923880}, {-0.406737,-0.913545}, {-0.430511,-0.902585},
> +{-0.453991,-0.891006}, {-0.477159,-0.878817}, {-0.500000,-0.866025},
> +{-0.522499,-0.852640}, {-0.544639,-0.838671}, {-0.566406,-0.824126},
> +{-0.587785,-0.809017}, {-0.608761,-0.793353}, {-0.629321,-0.777146},
> +{-0.649448,-0.760406}, {-0.669131,-0.743145}, {-0.688355,-0.725374},
> +{-0.707107,-0.707107}, {-0.725374,-0.688354}, {-0.743145,-0.669130},
> +{-0.760406,-0.649448}, {-0.777146,-0.629320}, {-0.793353,-0.608761},
> +{-0.809017,-0.587785}, {-0.824126,-0.566406}, {-0.838671,-0.544639},
> +{-0.852640,-0.522498}, {-0.866025,-0.500000}, {-0.878817,-0.477159},
> +{-0.891007,-0.453990}, {-0.902585,-0.430511}, {-0.913545,-0.406737},
> +{-0.923880,-0.382683}, {-0.933580,-0.358368}, {-0.942642,-0.333807},
> +{-0.951057,-0.309017}, {-0.958820,-0.284015}, {-0.965926,-0.258819},
> +{-0.972370,-0.233445}, {-0.978148,-0.207912}, {-0.983255,-0.182235},
> +{-0.987688,-0.156434}, {-0.991445,-0.130526}, {-0.994522,-0.104528},
> +{-0.996917,-0.078459}, {-0.998630,-0.052336}, {-0.999657,-0.026177},
> +{1.000000,-0.000000}, {0.999229,-0.039260}, {0.996917,-0.078459},
> +{0.993068,-0.117537}, {0.987688,-0.156434}, {0.980785,-0.195090},
> +{0.972370,-0.233445}, {0.962455,-0.271440}, {0.951057,-0.309017},
> +{0.938191,-0.346117}, {0.923880,-0.382683}, {0.908143,-0.418660},
> +{0.891007,-0.453991}, {0.872496,-0.488621}, {0.852640,-0.522499},
> +{0.831470,-0.555570}, {0.809017,-0.587785}, {0.785317,-0.619094},
> +{0.760406,-0.649448}, {0.734322,-0.678801}, {0.707107,-0.707107},
> +{0.678801,-0.734323}, {0.649448,-0.760406}, {0.619094,-0.785317},
> +{0.587785,-0.809017}, {0.555570,-0.831470}, {0.522498,-0.852640},
> +{0.488621,-0.872496}, {0.453991,-0.891007}, {0.418660,-0.908143},
> +{0.382683,-0.923880}, {0.346117,-0.938191}, {0.309017,-0.951057},
> +{0.271440,-0.962455}, {0.233445,-0.972370}, {0.195090,-0.980785},
> +{0.156434,-0.987688}, {0.117537,-0.993068}, {0.078459,-0.996917},
> +{0.039260,-0.999229}, {-0.000000,-1.000000}, {-0.039260,-0.999229},
> +{-0.078459,-0.996917}, {-0.117537,-0.993068}, {-0.156434,-0.987688},
> +{-0.195090,-0.980785}, {-0.233445,-0.972370}, {-0.271440,-0.962455},
> +{-0.309017,-0.951056}, {-0.346117,-0.938191}, {-0.382684,-0.923880},
> +{-0.418660,-0.908143}, {-0.453991,-0.891006}, {-0.488621,-0.872496},
> +{-0.522499,-0.852640}, {-0.555570,-0.831470}, {-0.587785,-0.809017},
> +{-0.619094,-0.785317}, {-0.649448,-0.760406}, {-0.678801,-0.734322},
> +{-0.707107,-0.707107}, {-0.734323,-0.678801}, {-0.760406,-0.649448},
> +{-0.785317,-0.619094}, {-0.809017,-0.587785}, {-0.831470,-0.555570},
> +{-0.852640,-0.522498}, {-0.872496,-0.488621}, {-0.891007,-0.453990},
> +{-0.908143,-0.418660}, {-0.923880,-0.382683}, {-0.938191,-0.346117},
> +{-0.951057,-0.309017}, {-0.962455,-0.271440}, {-0.972370,-0.233445},
> +{-0.980785,-0.195090}, {-0.987688,-0.156434}, {-0.993068,-0.117537},
> +{-0.996917,-0.078459}, {-0.999229,-0.039260}, {-1.000000,0.000000},
> +{-0.999229,0.039260}, {-0.996917,0.078459}, {-0.993068,0.117538},
> +{-0.987688,0.156435}, {-0.980785,0.195090}, {-0.972370,0.233446},
> +{-0.962455,0.271441}, {-0.951057,0.309017}, {-0.938191,0.346117},
> +{-0.923880,0.382683}, {-0.908143,0.418660}, {-0.891007,0.453991},
> +{-0.872496,0.488621}, {-0.852640,0.522499}, {-0.831470,0.555570},
> +{-0.809017,0.587785}, {-0.785317,0.619094}, {-0.760406,0.649448},
> +{-0.734322,0.678801}, {-0.707107,0.707107}, {-0.678801,0.734323},
> +{-0.649448,0.760406}, {-0.619094,0.785317}, {-0.587785,0.809017},
> +{-0.555570,0.831470}, {-0.522498,0.852640}, {-0.488621,0.872496},
> +{-0.453990,0.891007}, {-0.418659,0.908143}, {-0.382683,0.923880},
> +{-0.346117,0.938191}, {-0.309017,0.951056}, {-0.271441,0.962455},
> +{-0.233445,0.972370}, {-0.195090,0.980785}, {-0.156435,0.987688},
> +{-0.117537,0.993068}, {-0.078459,0.996917}, {-0.039260,0.999229},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
> +{1.000000,0.000000}, {1.000000,-0.000000}, {1.000000,-0.000000},
> +{1.000000,-0.000000}, {0.913545,-0.406737}, {0.669131,-0.743145},
> +{1.000000,-0.000000}, {0.669131,-0.743145}, {-0.104529,-0.994522},
> +{1.000000,-0.000000}, {0.309017,-0.951057}, {-0.809017,-0.587785},
> +{1.000000,-0.000000}, {-0.104529,-0.994522}, {-0.978148,0.207912},
> +{1.000000,-0.000000}, {0.994522,-0.104528}, {0.978148,-0.207912},
> +{0.951057,-0.309017}, {0.913545,-0.406737}, {0.866025,-0.500000},
> +{0.809017,-0.587785}, {0.743145,-0.669131}, {0.669131,-0.743145},
> +{0.587785,-0.809017}, {0.500000,-0.866025}, {0.406737,-0.913545},
> +{0.309017,-0.951057}, {0.207912,-0.978148}, {0.104528,-0.994522},
> +{1.000000,-0.000000}, {0.978148,-0.207912}, {0.913545,-0.406737},
> +{0.809017,-0.587785}, {0.669131,-0.743145}, {0.500000,-0.866025},
> +{0.309017,-0.951057}, {0.104528,-0.994522}, {-0.104529,-0.994522},
> +{-0.309017,-0.951056}, {-0.500000,-0.866025}, {-0.669131,-0.743145},
> +{-0.809017,-0.587785}, {-0.913545,-0.406737}, {-0.978148,-0.207912},
> +{1.000000,-0.000000}, {0.951057,-0.309017}, {0.809017,-0.587785},
> +{0.587785,-0.809017}, {0.309017,-0.951057}, {-0.000000,-1.000000},
> +{-0.309017,-0.951056}, {-0.587785,-0.809017}, {-0.809017,-0.587785},
> +{-0.951057,-0.309017}, {-1.000000,0.000000}, {-0.951057,0.309017},
> +{-0.809017,0.587785}, {-0.587785,0.809017}, {-0.309017,0.951056},
> +{1.000000,-0.000000}, {0.999657,-0.026177}, {0.998630,-0.052336},
> +{0.996917,-0.078459}, {0.994522,-0.104528}, {0.991445,-0.130526},
> +{0.987688,-0.156434}, {0.983255,-0.182236}, {0.978148,-0.207912},
> +{0.972370,-0.233445}, {0.965926,-0.258819}, {0.958820,-0.284015},
> +{0.951057,-0.309017}, {0.942641,-0.333807}, {0.933580,-0.358368},
> +{0.923880,-0.382683}, {0.913545,-0.406737}, {0.902585,-0.430511},
> +{0.891007,-0.453991}, {0.878817,-0.477159}, {0.866025,-0.500000},
> +{0.852640,-0.522499}, {0.838671,-0.544639}, {0.824126,-0.566406},
> +{0.809017,-0.587785}, {0.793353,-0.608761}, {0.777146,-0.629320},
> +{0.760406,-0.649448}, {0.743145,-0.669131}, {0.725374,-0.688355},
> +{0.707107,-0.707107}, {0.688355,-0.725374}, {0.669131,-0.743145},
> +{0.649448,-0.760406}, {0.629320,-0.777146}, {0.608761,-0.793353},
> +{0.587785,-0.809017}, {0.566406,-0.824126}, {0.544639,-0.838671},
> +{0.522498,-0.852640}, {0.500000,-0.866025}, {0.477159,-0.878817},
> +{0.453991,-0.891007}, {0.430511,-0.902585}, {0.406737,-0.913545},
> +{0.382683,-0.923880}, {0.358368,-0.933580}, {0.333807,-0.942641},
> +{0.309017,-0.951057}, {0.284015,-0.958820}, {0.258819,-0.965926},
> +{0.233445,-0.972370}, {0.207912,-0.978148}, {0.182236,-0.983255},
> +{0.156434,-0.987688}, {0.130526,-0.991445}, {0.104528,-0.994522},
> +{0.078459,-0.996917}, {0.052336,-0.998630}, {0.026177,-0.999657},
> +{1.000000,-0.000000}, {0.998630,-0.052336}, {0.994522,-0.104528},
> +{0.987688,-0.156434}, {0.978148,-0.207912}, {0.965926,-0.258819},
> +{0.951057,-0.309017}, {0.933580,-0.358368}, {0.913545,-0.406737},
> +{0.891007,-0.453991}, {0.866025,-0.500000}, {0.838671,-0.544639},
> +{0.809017,-0.587785}, {0.777146,-0.629320}, {0.743145,-0.669131},
> +{0.707107,-0.707107}, {0.669131,-0.743145}, {0.629320,-0.777146},
> +{0.587785,-0.809017}, {0.544639,-0.838671}, {0.500000,-0.866025},
> +{0.453991,-0.891007}, {0.406737,-0.913545}, {0.358368,-0.933580},
> +{0.309017,-0.951057}, {0.258819,-0.965926}, {0.207912,-0.978148},
> +{0.156434,-0.987688}, {0.104528,-0.994522}, {0.052336,-0.998630},
> +{-0.000000,-1.000000}, {-0.052336,-0.998630}, {-0.104529,-0.994522},
> +{-0.156434,-0.987688}, {-0.207912,-0.978148}, {-0.258819,-0.965926},
> +{-0.309017,-0.951056}, {-0.358368,-0.933580}, {-0.406737,-0.913545},
> +{-0.453991,-0.891006}, {-0.500000,-0.866025}, {-0.544639,-0.838671},
> +{-0.587785,-0.809017}, {-0.629321,-0.777146}, {-0.669131,-0.743145},
> +{-0.707107,-0.707107}, {-0.743145,-0.669130}, {-0.777146,-0.629320},
> +{-0.809017,-0.587785}, {-0.838671,-0.544639}, {-0.866025,-0.500000},
> +{-0.891007,-0.453990}, {-0.913545,-0.406737}, {-0.933580,-0.358368},
> +{-0.951057,-0.309017}, {-0.965926,-0.258819}, {-0.978148,-0.207912},
> +{-0.987688,-0.156434}, {-0.994522,-0.104528}, {-0.998630,-0.052336},
> +{1.000000,-0.000000}, {0.996917,-0.078459}, {0.987688,-0.156434},
> +{0.972370,-0.233445}, {0.951057,-0.309017}, {0.923880,-0.382683},
> +{0.891007,-0.453991}, {0.852640,-0.522499}, {0.809017,-0.587785},
> +{0.760406,-0.649448}, {0.707107,-0.707107}, {0.649448,-0.760406},
> +{0.587785,-0.809017}, {0.522498,-0.852640}, {0.453991,-0.891007},
> +{0.382683,-0.923880}, {0.309017,-0.951057}, {0.233445,-0.972370},
> +{0.156434,-0.987688}, {0.078459,-0.996917}, {-0.000000,-1.000000},
> +{-0.078459,-0.996917}, {-0.156434,-0.987688}, {-0.233445,-0.972370},
> +{-0.309017,-0.951056}, {-0.382684,-0.923880}, {-0.453991,-0.891006},
> +{-0.522499,-0.852640}, {-0.587785,-0.809017}, {-0.649448,-0.760406},
> +{-0.707107,-0.707107}, {-0.760406,-0.649448}, {-0.809017,-0.587785},
> +{-0.852640,-0.522498}, {-0.891007,-0.453990}, {-0.923880,-0.382683},
> +{-0.951057,-0.309017}, {-0.972370,-0.233445}, {-0.987688,-0.156434},
> +{-0.996917,-0.078459}, {-1.000000,0.000000}, {-0.996917,0.078459},
> +{-0.987688,0.156435}, {-0.972370,0.233446}, {-0.951057,0.309017},
> +{-0.923880,0.382683}, {-0.891007,0.453991}, {-0.852640,0.522499},
> +{-0.809017,0.587785}, {-0.760406,0.649448}, {-0.707107,0.707107},
> +{-0.649448,0.760406}, {-0.587785,0.809017}, {-0.522498,0.852640},
> +{-0.453990,0.891007}, {-0.382683,0.923880}, {-0.309017,0.951056},
> +{-0.233445,0.972370}, {-0.156435,0.987688}, {-0.078459,0.996917},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
> +{1.000000,0.000000}, {1.000000,-0.000000}, {1.000000,-0.000000},
> +{1.000000,-0.000000}, {0.913545,-0.406737}, {0.669131,-0.743145},
> +{1.000000,-0.000000}, {0.669131,-0.743145}, {-0.104529,-0.994522},
> +{1.000000,-0.000000}, {0.309017,-0.951057}, {-0.809017,-0.587785},
> +{1.000000,-0.000000}, {-0.104529,-0.994522}, {-0.978148,0.207912},
> +{1.000000,-0.000000}, {0.978148,-0.207912}, {0.913545,-0.406737},
> +{0.809017,-0.587785}, {0.669131,-0.743145}, {0.500000,-0.866025},
> +{0.309017,-0.951057}, {0.104528,-0.994522}, {-0.104529,-0.994522},
> +{-0.309017,-0.951056}, {-0.500000,-0.866025}, {-0.669131,-0.743145},
> +{-0.809017,-0.587785}, {-0.913545,-0.406737}, {-0.978148,-0.207912},
> +{1.000000,-0.000000}, {0.998630,-0.052336}, {0.994522,-0.104528},
> +{0.987688,-0.156434}, {0.978148,-0.207912}, {0.965926,-0.258819},
> +{0.951057,-0.309017}, {0.933580,-0.358368}, {0.913545,-0.406737},
> +{0.891007,-0.453991}, {0.866025,-0.500000}, {0.838671,-0.544639},
> +{0.809017,-0.587785}, {0.777146,-0.629320}, {0.743145,-0.669131},
> +{0.707107,-0.707107}, {0.669131,-0.743145}, {0.629320,-0.777146},
> +{0.587785,-0.809017}, {0.544639,-0.838671}, {0.500000,-0.866025},
> +{0.453991,-0.891007}, {0.406737,-0.913545}, {0.358368,-0.933580},
> +{0.309017,-0.951057}, {0.258819,-0.965926}, {0.207912,-0.978148},
> +{0.156434,-0.987688}, {0.104528,-0.994522}, {0.052336,-0.998630},
> +{1.000000,-0.000000}, {0.994522,-0.104528}, {0.978148,-0.207912},
> +{0.951057,-0.309017}, {0.913545,-0.406737}, {0.866025,-0.500000},
> +{0.809017,-0.587785}, {0.743145,-0.669131}, {0.669131,-0.743145},
> +{0.587785,-0.809017}, {0.500000,-0.866025}, {0.406737,-0.913545},
> +{0.309017,-0.951057}, {0.207912,-0.978148}, {0.104528,-0.994522},
> +{-0.000000,-1.000000}, {-0.104529,-0.994522}, {-0.207912,-0.978148},
> +{-0.309017,-0.951056}, {-0.406737,-0.913545}, {-0.500000,-0.866025},
> +{-0.587785,-0.809017}, {-0.669131,-0.743145}, {-0.743145,-0.669130},
> +{-0.809017,-0.587785}, {-0.866025,-0.500000}, {-0.913545,-0.406737},
> +{-0.951057,-0.309017}, {-0.978148,-0.207912}, {-0.994522,-0.104528},
> +{1.000000,-0.000000}, {0.987688,-0.156434}, {0.951057,-0.309017},
> +{0.891007,-0.453991}, {0.809017,-0.587785}, {0.707107,-0.707107},
> +{0.587785,-0.809017}, {0.453991,-0.891007}, {0.309017,-0.951057},
> +{0.156434,-0.987688}, {-0.000000,-1.000000}, {-0.156434,-0.987688},
> +{-0.309017,-0.951056}, {-0.453991,-0.891006}, {-0.587785,-0.809017},
> +{-0.707107,-0.707107}, {-0.809017,-0.587785}, {-0.891007,-0.453990},
> +{-0.951057,-0.309017}, {-0.987688,-0.156434}, {-1.000000,0.000000},
> +{-0.987688,0.156435}, {-0.951057,0.309017}, {-0.891007,0.453991},
> +{-0.809017,0.587785}, {-0.707107,0.707107}, {-0.587785,0.809017},
> +{-0.453990,0.891007}, {-0.309017,0.951056}, {-0.156435,0.987688},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
> +{1.000000,0.000000}, {1.000000,-0.000000}, {1.000000,-0.000000},
> +{1.000000,-0.000000}, {0.913545,-0.406737}, {0.669131,-0.743145},
> +{1.000000,-0.000000}, {0.669131,-0.743145}, {-0.104529,-0.994522},
> +{1.000000,-0.000000}, {0.309017,-0.951057}, {-0.809017,-0.587785},
> +{1.000000,-0.000000}, {-0.104529,-0.994522}, {-0.978148,0.207912},
> +{1.000000,-0.000000}, {0.994522,-0.104528}, {0.978148,-0.207912},
> +{0.951057,-0.309017}, {0.913545,-0.406737}, {0.866025,-0.500000},
> +{0.809017,-0.587785}, {0.743145,-0.669131}, {0.669131,-0.743145},
> +{0.587785,-0.809017}, {0.500000,-0.866025}, {0.406737,-0.913545},
> +{0.309017,-0.951057}, {0.207912,-0.978148}, {0.104528,-0.994522},
> +{1.000000,-0.000000}, {0.978148,-0.207912}, {0.913545,-0.406737},
> +{0.809017,-0.587785}, {0.669131,-0.743145}, {0.500000,-0.866025},
> +{0.309017,-0.951057}, {0.104528,-0.994522}, {-0.104529,-0.994522},
> +{-0.309017,-0.951056}, {-0.500000,-0.866025}, {-0.669131,-0.743145},
> +{-0.809017,-0.587785}, {-0.913545,-0.406737}, {-0.978148,-0.207912},
> +{1.000000,-0.000000}, {0.951057,-0.309017}, {0.809017,-0.587785},
> +{0.587785,-0.809017}, {0.309017,-0.951057}, {-0.000000,-1.000000},
> +{-0.309017,-0.951056}, {-0.587785,-0.809017}, {-0.809017,-0.587785},
> +{-0.951057,-0.309017}, {-1.000000,0.000000}, {-0.951057,0.309017},
> +{-0.809017,0.587785}, {-0.587785,0.809017}, {-0.309017,0.951056},
> +};
> +static const ne10_fft_state_float32_t cfg_arch_480 = {
> +120,
> +(ne10_int32_t *)ne10_factors_480,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
> +};
> +static const ne10_fft_state_float32_t cfg_arch_240 = {
> +60,
> +(ne10_int32_t *)ne10_factors_240,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
> +};
> +static const ne10_fft_state_float32_t cfg_arch_120 = {
> +30,
> +(ne10_int32_t *)ne10_factors_120,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
> +};
> +static const ne10_fft_state_float32_t cfg_arch_60 = {
> +15,
> +(ne10_int32_t *)ne10_factors_60,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
> +};
> +#endif /*end NE10_FFT_PARAMS48000_960*/
> diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c
> index 57db0e3..8996f17 100644
> --- a/celt/tests/test_unit_dft.c
> +++ b/celt/tests/test_unit_dft.c
> @@ -45,6 +45,16 @@
> #include "mathops.c"
> #include "entcode.c"
>
> +#if defined(OPUS_HAVE_RTCD) && \
> + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_NEON_INTR))
> +#include "arm/armcpu.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/arm_celt_ne10_fft_map.c"
> +#endif
> +#elif defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
> +#include "x86/x86cpu.c"
> +#endif
>
> #ifndef M_PI
> #define M_PI 3.141592653
> @@ -125,7 +135,7 @@ void test1d(int nfft,int isinverse)
> if (isinverse)
> opus_ifft(cfg,in,out);
> else
> - opus_fft(cfg,in,out);
> + opus_fft(cfg,in,out, opus_select_arch());
>
> /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
>
> @@ -153,10 +163,12 @@ int main(int argc,char ** argv)
> test1d(256,0);
> test1d(256,1);
> #ifndef RADIX_TWO_ONLY
> +#ifndef HAVE_ARM_NE10
> test1d(36,0);
> test1d(36,1);
> test1d(50,0);
> test1d(50,1);
> +#endif
> test1d(120,0);
> test1d(120,1);
> #endif
> diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c
> index ac8957f..950d824 100644
> --- a/celt/tests/test_unit_mdct.c
> +++ b/celt/tests/test_unit_mdct.c
> @@ -46,6 +46,19 @@
> #include "mathops.c"
> #include "entcode.c"
>
> +#if defined(OPUS_HAVE_RTCD) && \
> + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_NEON_INTR))
> +#include "arm/armcpu.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#include "arm/arm_celt_ne10_fft_map.c"
> +#include "arm/arm_celt_ne10_mdct_map.c"
> +#endif
> +#elif defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
> +#include "x86/x86cpu.c"
> +#endif
> +
> #ifndef M_PI
> #define M_PI 3.141592653
> #endif
> @@ -156,7 +169,7 @@ void test1d(int nfft,int isinverse)
> out[nfft-k-1] = out[nfft/2+k];
> check_inv(in,out,nfft,isinverse);
> } else {
> - clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1);
> + clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, opus_select_arch());
> check(in_copy,out,nfft,isinverse);
> }
> /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
> @@ -188,6 +201,9 @@ int main(int argc,char ** argv)
> test1d(2048,0);
> test1d(2048,1);
> #ifndef RADIX_TWO_ONLY
> +
> +/* ARM NE10 library does not support below values */
> +#ifndef HAVE_ARM_NE10
> test1d(36,0);
> test1d(36,1);
> test1d(40,0);
> @@ -196,6 +212,7 @@ int main(int argc,char ** argv)
> test1d(60,1);
> test1d(120,0);
> test1d(120,1);
> +#endif
> test1d(240,0);
> test1d(240,1);
> test1d(480,0);
> diff --git a/celt_headers.mk b/celt_headers.mk
> index 5bb193e..c51c3ee 100644
> --- a/celt_headers.mk
> +++ b/celt_headers.mk
> @@ -31,11 +31,14 @@ celt/stack_alloc.h \
> celt/vq.h \
> celt/static_modes_float.h \
> celt/static_modes_fixed.h \
> +celt/static_modes_float_arm_ne10.h \
> celt/arm/armcpu.h \
> celt/arm/fixed_armv4.h \
> celt/arm/fixed_armv5e.h \
> celt/arm/kiss_fft_armv4.h \
> celt/arm/kiss_fft_armv5e.h \
> celt/arm/pitch_arm.h \
> +celt/arm/fft_arm.h \
> +celt/arm/mdct_arm.h \
> celt/x86/pitch_sse.h \
> celt/x86/x86cpu.h
> diff --git a/celt_sources.mk b/celt_sources.mk
> index 29ec937..28c7bae 100644
> --- a/celt_sources.mk
> +++ b/celt_sources.mk
> @@ -35,3 +35,9 @@ celt/arm/armopts.s.in
>
> CELT_SOURCES_ARM_NEON_INTR = \
> celt/arm/celt_neon_intr.c
> +
> +CELT_SOURCES_ARM_NE10= \
> +celt/arm/celt_ne10_fft.c \
> +celt/arm/celt_ne10_mdct.c \
> +celt/arm/arm_celt_ne10_fft_map.c \
> +celt/arm/arm_celt_ne10_mdct_map.c
> diff --git a/configure.ac b/configure.ac
> index 87cece9..baa3425 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -351,6 +351,80 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
> AM_CONDITIONAL([HAVE_SSE4_1], [false])
> AM_CONDITIONAL([HAVE_SSE2], [false])
>
> +AC_DEFUN([OPUS_PATH_NE10],
> + [
> + AC_ARG_WITH(NE10,
> + AC_HELP_STRING([--with-NE10=PFX],[Prefix where libNE10 is installed (optional)]),
> + NE10_prefix="$withval", NE10_prefix="")
> + AC_ARG_WITH(NE10-libraries,
> + AC_HELP_STRING([--with-NE10-libraries=DIR],
> + [Directory where libNE10 library is installed (optional)]),
> + NE10_libraries="$withval", NE10_libraries="")
> + AC_ARG_WITH(NE10-includes,
> + AC_HELP_STRING([--with-NE10-includes=DIR],
> + [Directory where libNE10 header files are installed (optional)]),
> + NE10_includes="$withval", ogg_includes="")
> +
> + if test "x$NE10_libraries" != "x" ; then
> + NE10_LIBS="-L$NE10_libraries"
> + elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
> + NE10_LIBS=""
> + elif test "x$NE10_prefix" != "x" ; then
> + NE10_LIBS="-L$NE10_prefix/lib"
> + elif test "x$prefix" != "xNONE" ; then
> + NE10_LIBS="-L$prefix/lib"
> + fi
> +
> + if test "x$NE10_prefix" != "xno" ; then
> + NE10_LIBS="$NE10_LIBS -lNE10"
> + fi
> +
> + if test "x$NE10_includes" != "x" ; then
> + NE10_CFLAGS="-I$NE10_includes"
> + elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
> + NE10_CFLAGS=""
> + elif test "x$ogg_prefix" != "x" ; then
> + NE10_CFLAGS="-I$NE10_prefix/include"
> + elif test "x$prefix" != "xNONE"; then
> + NE10_CFLAGS="-I$prefix/include"
> + fi
> +
> + AC_MSG_CHECKING(for NE10)
> + save_CFLAGS="$CFLAGS"; CFLAGS="$NE10_CFLAGS"
> + save_LIBS="$LIBS"; LIBS="$NE10_LIBS"
> + AC_LINK_IFELSE(
> + [
> + AC_LANG_PROGRAM(
> + [[#include <NE10_init.h>
> + ]],
> + [[
> + ne10_fft_cfg_float32_t cfg;
> + cfg = ne10_fft_alloc_c2c_float32_neon(480);
> + ]]
> + )
> + ],[
> + HAVE_ARM_NE10=1
> + AC_MSG_RESULT([yes])
> + ],[
> + HAVE_ARM_NE10=0
> + AC_MSG_RESULT([no])
> + NE10_CFLAGS=""
> + NE10_LIBS=""
> + ]
> + )
> + CFLAGS="$save_CFLAGS"; LIBS="$save_LIBS"
> + #Now we know if libNE10 is installed or not
> + AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
> + [
> + AC_DEFINE([HAVE_ARM_NE10], 1, [NE10 library is installed on host. Make sure it is on target!])
> + AC_SUBST(HAVE_ARM_NE10)
> + AC_SUBST(NE10_CFLAGS)
> + AC_SUBST(NE10_LIBS)
> + ],[]
> + )
> + ]
> +)
> +
> AS_IF([test x"$enable_intrinsics" = x"yes"],[
> case $host_cpu in
> arm*)
> @@ -391,6 +465,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
> AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
> AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
> AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
> +
> + OPUS_PATH_NE10()
> + AS_IF([test x"$NE10_LIBS" != "x"],
> + [enable_intrinsics="$enable_intrinsics NE10"],[])
> ],
> [
> AC_MSG_WARN([Compiler does not support ARM intrinsics])
> @@ -516,6 +594,9 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
> AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
> AM_CONDITIONAL([OPUS_ARM_NEON_INTR],
> [test x"$OPUS_ARM_NEON_INTR" = x"1"])
> +AM_CONDITIONAL([HAVE_ARM_NE10],
> + [test x"$HAVE_ARM_NE10" = x"1"])
> +
>
> AS_IF([test x"$enable_rtcd" = x"yes"],[
> AS_IF([test x"$rtcd_support" != x"no"],[
> diff --git a/src/analysis.c b/src/analysis.c
> index 2ee8533..0603643 100644
> --- a/src/analysis.c
> +++ b/src/analysis.c
> @@ -262,7 +262,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
> remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
> downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
> tonal->mem_fill = 240 + remaining;
> - opus_fft(kfft, in, out);
> + opus_fft(kfft, in, out, opus_select_arch());
> #ifndef FIXED_POINT
> /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
> if (celt_isnan(out[0].r))
> diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
> index 6e87337..05744bf 100644
> --- a/src/opus_multistream_encoder.c
> +++ b/src/opus_multistream_encoder.c
> @@ -257,7 +257,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
> OPUS_COPY(in, mem+c*overlap, overlap);
> (*copy_channel_in)(x, 1, pcm, channels, c, len);
> celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
> - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
> + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
> + overlap, celt_mode->maxLM-LM, 1, opus_select_arch());
> if (upsample != 1)
> {
> int bound = len;
> --
> 1.7.9.5
>
More information about the opus
mailing list