[opus] [RFC PATCH v2] armv7(float): Optimize encode usecase using NE10 library

Wed Feb 4 14:04:29 PST 2015

On 4 February 2015 at 12:21, Viswanath Puttagunta
<viswanath.puttagunta at linaro.org> wrote:
> Optimize opus encode (float only) usecase using ARM NE10
> library. Mainly effects opus_fft and ctl_mdct_forward
> and related functions.
>
> This optimization can be used for ARM CPUs that have NEON
> VFP unit. This patch only enables optimizations for ARMv7.
>
> Official ARM NE10 library page available at
> http://projectne10.github.io/Ne10/
>
> To enable this optimization, use
> --enable-intrinsics --with-NE10=<install_prefix>
> or
> --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir>
>
> Compile time checks made during configure process to make sure
> optimization option available only when compiler supports NEON
> instrinsics.
>
> Runtime checks made to make sure optimized functions only called
> on appropriate hardware.
> ---
>  Makefile.am                           |   34 +--
>  celt/arm/arm_celt_map.c               |   47 +++-
>  celt/arm/celt_ne10_fft.c              |  120 ++++++++++
>  celt/arm/celt_ne10_mdct.c             |  158 +++++++++++++
>  celt/arm/fft_arm.h                    |   66 ++++++
>  celt/arm/mdct_arm.h                   |   53 +++++
>  celt/celt_encoder.c                   |   13 +-
>  celt/dump_modes/Makefile              |   23 +-
>  celt/dump_modes/dump_modes.c          |   21 ++
>  celt/dump_modes/dump_modes_arch.h     |   41 ++++
>  celt/dump_modes/dump_modes_arm_ne10.c |  125 ++++++++++
>  celt/kiss_fft.c                       |   27 ++-
>  celt/kiss_fft.h                       |   54 ++++-
>  celt/mdct.c                           |   15 +-
>  celt/mdct.h                           |   39 +++-
>  celt/modes.c                          |    8 +-
>  celt/static_modes_float.h             |   25 ++
>  celt/static_modes_float_arm_ne10.h    |  404 +++++++++++++++++++++++++++++++++
>  celt/tests/test_unit_dft.c            |   52 +++--
>  celt/tests/test_unit_mathops.c        |    6 +
>  celt/tests/test_unit_mdct.c           |   81 ++++---
>  celt/tests/test_unit_rotation.c       |    6 +
>  celt_headers.mk                       |    3 +
>  celt_sources.mk                       |    4 +
>  configure.ac                          |   81 +++++++
>  src/analysis.c                        |    8 +-
>  src/analysis.h                        |    2 +-
>  src/opus_encoder.c                    |    2 +-
>  src/opus_multistream_encoder.c        |    9 +-
>  29 files changed, 1422 insertions(+), 105 deletions(-)
>  create mode 100644 celt/arm/celt_ne10_fft.c
>  create mode 100644 celt/arm/celt_ne10_mdct.c
>  create mode 100644 celt/arm/fft_arm.h
>  create mode 100644 celt/arm/mdct_arm.h
>  create mode 100644 celt/dump_modes/dump_modes_arch.h
>  create mode 100644 celt/dump_modes/dump_modes_arm_ne10.c
>  create mode 100644 celt/static_modes_float_arm_ne10.h
>
> diff --git a/Makefile.am b/Makefile.am
> index 95323ca..c7d9533 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -10,7 +10,7 @@ lib_LTLIBRARIES = libopus.la
>  DIST_SUBDIRS = doc
>
>  AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
> -              -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
> +              -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
>
>  include celt_sources.mk
>  include silk_sources.mk
> @@ -47,6 +47,10 @@ CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
>  OPUS_ARM_NEON_INTR_CPPFLAGS = -mfpu=neon
>  endif
>
> +if HAVE_ARM_NE10
> +CELT_SOURCES += $(CELT_SOURCES_ARM_NE10)
> +endif
> +
>  if OPUS_ARM_EXTERNAL_ASM
>  nodist_libopus_la_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S)
>  BUILT_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) \
> @@ -64,7 +68,7 @@ include opus_headers.mk
>
>  libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
>  libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
> -libopus_la_LIBADD = $(LIBM)
> +libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
>
>  pkginclude_HEADERS = include/opus.h include/opus_multistream.h include/opus_types.h include/opus_defines.h
>
> @@ -77,32 +81,32 @@ TESTS = celt/tests/test_unit_types celt/tests/test_unit_mathops celt/tests/test_
>
>  opus_demo_SOURCES = src/opus_demo.c
>
> -opus_demo_LDADD = libopus.la $(LIBM)
> +opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
>  repacketizer_demo_SOURCES = src/repacketizer_demo.c
>
> -repacketizer_demo_LDADD = libopus.la $(LIBM)
> +repacketizer_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
>  opus_compare_SOURCES = src/opus_compare.c
>  opus_compare_LDADD = $(LIBM)
>
>  tests_test_opus_api_SOURCES = tests/test_opus_api.c tests/test_opus_common.h
> -tests_test_opus_api_LDADD = libopus.la $(LIBM)
> +tests_test_opus_api_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
>  tests_test_opus_encode_SOURCES = tests/test_opus_encode.c tests/test_opus_common.h
> -tests_test_opus_encode_LDADD = libopus.la $(LIBM)
> +tests_test_opus_encode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
>  tests_test_opus_decode_SOURCES = tests/test_opus_decode.c tests/test_opus_common.h
> -tests_test_opus_decode_LDADD = libopus.la $(LIBM)
> +tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
>  tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
> -tests_test_opus_padding_LDADD = libopus.la $(LIBM)
> +tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
>  celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
>  celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
>
>  celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
> -celt_tests_test_unit_dft_LDADD = $(LIBM)
> +celt_tests_test_unit_dft_LDADD = $(NE10_LIBS) $(LIBM)
>
>  celt_tests_test_unit_entropy_SOURCES = celt/tests/test_unit_entropy.c
>  celt_tests_test_unit_entropy_LDADD = $(LIBM)
> @@ -111,7 +115,7 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
>  celt_tests_test_unit_laplace_LDADD = $(LIBM)
>
>  celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
> -celt_tests_test_unit_mathops_LDADD = $(LIBM)
> +celt_tests_test_unit_mathops_LDADD = $(NE10_LIBS) $(LIBM)
>  if CPU_ARM
>  if OPUS_ARM_EXTERNAL_ASM
>  celt_tests_test_unit_mathops_LDADD += libopus.la
> @@ -119,10 +123,10 @@ endif
>  endif
>
>  celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
> -celt_tests_test_unit_mdct_LDADD = $(LIBM)
> +celt_tests_test_unit_mdct_LDADD = $(NE10_LIBS) $(LIBM)
>
>  celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
> -celt_tests_test_unit_rotation_LDADD = $(LIBM)
> +celt_tests_test_unit_rotation_LDADD = $(NE10_LIBS) $(LIBM)
>  if CPU_ARM
>  if OPUS_ARM_EXTERNAL_ASM
>  celt_tests_test_unit_rotation_LDADD += libopus.la
> @@ -269,6 +273,8 @@ endif
>
>  if OPUS_ARM_NEON_INTR
>  CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
> -                       %test_unit_rotation.o %test_unit_mathops.o
> -$(CELT_ARM_NEON_INTR_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CPPFLAGS)
> +                         $(CELT_SOURCES_ARM_NE10:.c=.lo) \
> +                         %test_unit_rotation.o %test_unit_mathops.o \
> +                         %test_unit_mdct.o %test_unit_dft.o
> +$(CELT_ARM_NEON_INTR_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CPPFLAGS) $(NE10_CFLAGS)
>  endif
> diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
> index 68c224d..3b49f90 100644
> --- a/celt/arm/arm_celt_map.c
> +++ b/celt/arm/arm_celt_map.c
> @@ -30,6 +30,8 @@
>  #endif
>
>  #include "pitch.h"
> +#include "kiss_fft.h"
> +#include "mdct.h"
>
>  #if defined(OPUS_HAVE_RTCD)
>
> @@ -50,7 +52,46 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
>    celt_pitch_xcorr_c,              /* Media */
>    celt_pitch_xcorr_float_neon      /* Neon */
>  };
> -#  endif
> -# endif
>
> -#endif
> +#if defined(HAVE_ARM_NE10)
> +#ifdef CUSTOM_MODES
> +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
Named this as such because it is implementation of opus_fft_alloc_arch().
> +   opus_fft_alloc_arch_c,        /* ARMv4 */
> +   opus_fft_alloc_arch_c,        /* EDSP */
> +   opus_fft_alloc_arch_c,        /* Media */
> +   opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
> +};
> +
> +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
> +   opus_fft_free_arch_c,         /* ARMv4 */
> +   opus_fft_free_arch_c,         /* EDSP */
> +   opus_fft_free_arch_c,         /* Media */
> +   opus_fft_free_arm_float_neon  /* Neon with NE10 */
> +};
> +#endif /* CUSTOM_MODES */
> +
> +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
I wanted to change name of this to OPUS_FFT_IMPL, but thought it would
be misleading
as opus_fft_impl() in celt/kiss_fft.c does not strictly implement opus_fft.
> +                                        const kiss_fft_cpx *fin,
> +                                        kiss_fft_cpx *fout) = {
> +   opus_fft_c,                   /* ARMv4 */
> +   opus_fft_c,                   /* EDSP */
> +   opus_fft_c,                   /* Media */
> +   opus_fft_float_neon           /* Neon with NE10 */
> +};
> +
> +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
I changed this name however, since it did not conflict with any other function.
> +                                                     kiss_fft_scalar *in,
> +                                                     kiss_fft_scalar * OPUS_RESTRICT out,
> +                                                     const opus_val16 *window,
> +                                                     int overlap, int shift,
> +                                                     int stride, int arch) = {
> +   clt_mdct_forward_c,           /* ARMv4 */
> +   clt_mdct_forward_c,           /* EDSP */
> +   clt_mdct_forward_c,           /* Media */
> +   clt_mdct_forward_float_neon   /* Neon with NE10 */
> +};
> +#endif /* HAVE_ARM_NE10 */
> +#  endif /* OPUS_ARM_NEON_INTR */
> +# endif /* FIXED_POINT */
> +
> +#endif /* OPUS_HAVE_RTCD */
> diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
> new file mode 100644
> index 0000000..b592f19
> --- /dev/null
> +++ b/celt/arm/celt_ne10_fft.c
> @@ -0,0 +1,120 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> +   Written by Viswanath Puttagunta */
> +/**
> +   @file celt_ne10_fft.c
> +   @brief ARM Neon optimizations for fft using NE10 library
> + */
> +
> +/*
> +   Redistribution and use in source and binary forms, with or without
> +   modification, are permitted provided that the following conditions
> +   are met:
> +
> +   - Redistributions of source code must retain the above copyright
> +   notice, this list of conditions and the following disclaimer.
> +
> +   - Redistributions in binary form must reproduce the above copyright
> +   notice, this list of conditions and the following disclaimer in the
> +   documentation and/or other materials provided with the distribution.
> +
> +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef SKIP_CONFIG_H
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +#endif
> +
> +#include <arm_neon.h>
> +#include <NE10_init.h>
> +#include <NE10_dsp.h>
> +#include "../kiss_fft.h"
> +#include "stack_alloc.h"
> +#include "os_support.h"
> +#include "stack_alloc.h"
> +
> +#ifdef CUSTOM_MODES
> +
> +/* nfft lengths in NE10 that support scaled fft */
> +#define NE10_FFTSCALED_SUPPORT_MAX 4
> +static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
> +   480, 240, 120, 60
> +};
> +
> +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
> +{
> +   int i;
> +   size_t memneeded = sizeof(struct arch_fft_state);
> +
> +   st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
> +   if (!st->arch_fft)
> +      return -1;
> +
> +   for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
> +      if(st->nfft == ne10_fft_scaled_support[i])
> +         break;
> +   }
> +   if (i == NE10_FFTSCALED_SUPPORT_MAX) {
> +      /* This nfft length (scaled fft) is not supported in NE10 */
> +      st->arch_fft->is_supported = 0;
> +      st->arch_fft->priv = NULL;
> +   }
> +   else {
> +      st->arch_fft->is_supported = 1;
> +      st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
> +      if (st->arch_fft->priv == NULL) {
> +         return -1;
> +      }
> +   }
> +   return 0;
> +}
> +
> +void opus_fft_free_arm_float_neon(kiss_fft_state *st)
> +{
> +   ne10_fft_cfg_float32_t cfg;
> +
> +   if (!st->arch_fft)
> +      return;
> +
> +   cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv;
> +   if (cfg)
> +      ne10_fft_destroy_c2c_float32(cfg);
> +   opus_free(st->arch_fft);
> +}
> +#endif
> +void opus_fft_float_neon(const kiss_fft_state *st,
> +                         const kiss_fft_cpx *fin,
> +                         kiss_fft_cpx *fout)
> +{
> +   ne10_fft_state_float32_t state;
> +   ne10_fft_cfg_float32_t cfg = &state;
> +   VARDECL(ne10_fft_cpx_float32_t, buffer);
> +   SAVE_STACK;
> +   ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
> +
> +   if (!st->arch_fft->is_supported) {
> +      /* This nfft length (scaled fft) not supported in NE10 */
> +      opus_fft_c(st, fin, fout);
> +   }
> +   else {
> +      memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
> +      state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
> +      state.is_forward_scaled = 1;
> +
> +      ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
> +                                   (ne10_fft_cpx_float32_t *)fin,
> +                                   cfg, 0);
> +   }
> +   RESTORE_STACK;
> +}
> diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c
> new file mode 100644
> index 0000000..cf175cb
> --- /dev/null
> +++ b/celt/arm/celt_ne10_mdct.c
> @@ -0,0 +1,158 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> +   Written by Viswanath Puttagunta */
> +/**
> +   @file celt_ne10_mdct.c
> +   @brief ARM Neon optimizations for mdct using NE10 library
> + */
> +
> +/*
> +   Redistribution and use in source and binary forms, with or without
> +   modification, are permitted provided that the following conditions
> +   are met:
> +
> +   - Redistributions of source code must retain the above copyright
> +   notice, this list of conditions and the following disclaimer.
> +
> +   - Redistributions in binary form must reproduce the above copyright
> +   notice, this list of conditions and the following disclaimer in the
> +   documentation and/or other materials provided with the distribution.
> +
> +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef SKIP_CONFIG_H
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +#endif
> +
> +#include "../kiss_fft.h"
> +#include "_kiss_fft_guts.h"
> +#include "../mdct.h"
> +#include "stack_alloc.h"
> +#include "os_support.h"
> +#include "stack_alloc.h"
> +
> +void clt_mdct_forward_float_neon(const mdct_lookup *l,
> +                                 kiss_fft_scalar *in,
> +                                 kiss_fft_scalar * OPUS_RESTRICT out,
> +                                 const opus_val16 *window,
> +                                 int overlap, int shift, int stride, int arch)
> +{
> +   int i;
> +   int N, N2, N4;
> +   VARDECL(kiss_fft_scalar, f);
> +   VARDECL(kiss_fft_cpx, f2);
> +   const kiss_fft_state *st = l->kfft[shift];
> +   const kiss_twiddle_scalar *trig;
> +
> +   SAVE_STACK;
> +
> +   N = l->n;
> +   trig = l->trig;
> +   for (i=0;i<shift;i++)
> +   {
> +      N >>= 1;
> +      trig += N;
> +   }
> +   N2 = N>>1;
> +   N4 = N>>2;
> +
> +   ALLOC(f, N2, kiss_fft_scalar);
> +   ALLOC(f2, N4, kiss_fft_cpx);
> +
> +   /* Consider the input to be composed of four blocks: [a, b, c, d] */
> +   /* Window, shuffle, fold */
> +   {
> +      /* Temp pointers to make it really clear to the compiler what we're doing */
> +      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
> +      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
> +      kiss_fft_scalar * OPUS_RESTRICT yp = f;
> +      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
> +      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
> +      for(i=0;i<((overlap+3)>>2);i++)
> +      {
> +         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
> +         *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
> +         *yp++ = MULT16_32_Q15(*wp1, *xp1)    - MULT16_32_Q15(*wp2, xp2[-N2]);
> +         xp1+=2;
> +         xp2-=2;
> +         wp1+=2;
> +         wp2-=2;
> +      }
> +      wp1 = window;
> +      wp2 = window+overlap-1;
> +      for(;i<N4-((overlap+3)>>2);i++)
> +      {
> +         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
> +         *yp++ = *xp2;
> +         *yp++ = *xp1;
> +         xp1+=2;
> +         xp2-=2;
> +      }
> +      for(;i<N4;i++)
> +      {
> +         /* Real part arranged as a-bR, Imag part arranged as -c-dR */
> +         *yp++ =  -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
> +         *yp++ = MULT16_32_Q15(*wp2, *xp1)     + MULT16_32_Q15(*wp1, xp2[N2]);
> +         xp1+=2;
> +         xp2-=2;
> +         wp1+=2;
> +         wp2-=2;
> +      }
> +   }
> +   /* Pre-rotation */
> +   {
> +      kiss_fft_scalar * OPUS_RESTRICT yp = f;
> +      const kiss_twiddle_scalar *t = &trig[0];
> +      for(i=0;i<N4;i++)
> +      {
> +         kiss_fft_cpx yc;
> +         kiss_twiddle_scalar t0, t1;
> +         kiss_fft_scalar re, im, yr, yi;
> +         t0 = t[i];
> +         t1 = t[N4+i];
> +         re = *yp++;
> +         im = *yp++;
> +         yr = S_MUL(re,t0)  -  S_MUL(im,t1);
> +         yi = S_MUL(im,t0)  +  S_MUL(re,t1);
> +         yc.r = yr;
> +         yc.i = yi;
> +         f2[i] = yc;
> +      }
> +   }
> +
> +   opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
> +
> +   /* Post-rotate */
> +   {
> +      /* Temp pointers to make it really clear to the compiler what we're doing */
> +      const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
> +      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
> +      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
> +      const kiss_twiddle_scalar *t = &trig[0];
> +      /* Temp pointers to make it really clear to the compiler what we're doing */
> +      for(i=0;i<N4;i++)
> +      {
> +         kiss_fft_scalar yr, yi;
> +         yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
> +         yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
> +         *yp1 = yr;
> +         *yp2 = yi;
> +         fp++;
> +         yp1 += 2*stride;
> +         yp2 -= 2*stride;
> +      }
> +   }
> +   RESTORE_STACK;
> +}
> diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h
> new file mode 100644
> index 0000000..e7a30d6
> --- /dev/null
> +++ b/celt/arm/fft_arm.h
> @@ -0,0 +1,66 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> +   Written by Viswanath Puttagunta */
> +/**
> +   @file fft_arm.h
> +   @brief ARM Neon Intrinsic optimizations for fft using NE10 library
> + */
> +
> +/*
> +   Redistribution and use in source and binary forms, with or without
> +   modification, are permitted provided that the following conditions
> +   are met:
> +
> +   - Redistributions of source code must retain the above copyright
> +   notice, this list of conditions and the following disclaimer.
> +
> +   - Redistributions in binary form must reproduce the above copyright
> +   notice, this list of conditions and the following disclaimer in the
> +   documentation and/or other materials provided with the distribution.
> +
> +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +
> +#if !defined(FFT_ARM_H)
> +#define FFT_ARM_H
> +
> +#include "config.h"
> +#include "kiss_fft.h"
> +
> +#if !defined(FIXED_POINT)
> +#if defined(HAVE_ARM_NE10)
> +
> +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
> +void opus_fft_free_arm_float_neon(kiss_fft_state *st);
> +
> +void opus_fft_float_neon(const kiss_fft_state *st,
> +                         const kiss_fft_cpx *fin,
> +                         kiss_fft_cpx *fout);
> +#if !defined(OPUS_HAVE_RTCD)
> +#define OVERRIDE_OPUS_FFT (1)
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> +   ((void)(arch), opus_fft_alloc_arm_float_neon(_st))
> +
> +#define opus_fft_free_arch(_st, arch) \
> +   ((void)(arch), opus_fft_free_arm_float_neon(_st))
> +
> +#define opus_fft(_st, _fin, _fout, arch) \
> +   ((void)(arch), opus_fft_float_neon(_st, _fin, _fout))
> +
> +#endif /* OPUS_HAVE_RTCD */
> +
> +#endif /* HAVE_ARM_NE10 */
> +#endif /* FIXED_POINT */
> +
> +#endif
> diff --git a/celt/arm/mdct_arm.h b/celt/arm/mdct_arm.h
> new file mode 100644
> index 0000000..33f7bb6
> --- /dev/null
> +++ b/celt/arm/mdct_arm.h
> @@ -0,0 +1,53 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> +   Written by Viswanath Puttagunta */
> +/**
> +   @file arm_mdct.h
> +   @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
> + */
> +
> +/*
> +   Redistribution and use in source and binary forms, with or without
> +   modification, are permitted provided that the following conditions
> +   are met:
> +
> +   - Redistributions of source code must retain the above copyright
> +   notice, this list of conditions and the following disclaimer.
> +
> +   - Redistributions in binary form must reproduce the above copyright
> +   notice, this list of conditions and the following disclaimer in the
> +   documentation and/or other materials provided with the distribution.
> +
> +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#if !defined(MDCT_ARM_H)
> +#define MDCT_ARM_H
> +
> +#include "config.h"
> +#include "mdct.h"
> +
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +/** Compute a forward MDCT and scale by 4/N, trashes the input array */
> +void clt_mdct_forward_float_neon(const mdct_lookup *l, kiss_fft_scalar *in,
> +                                 kiss_fft_scalar * OPUS_RESTRICT out,
> +                                 const opus_val16 *window, int overlap,
> +                                 int shift, int stride, int arch);
> +
> +#if !defined(OPUS_HAVE_RTCD)
> +#define OVERRIDE_OPUS_MDCT (1)
> +#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
> +      clt_mdct_forward_float_neon((_l, _in, _out, _window, _int, _shift, _stride, _arch)
> +#endif /* OPUS_HAVE_RTCD */
> +#endif /* !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) */
> +
> +#endif
> diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
> index 86a3fbb..7a2c71b 100644
> --- a/celt/celt_encoder.c
> +++ b/celt/celt_encoder.c
> @@ -414,7 +414,8 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
>  /** Apply window and compute the MDCT for all sub-frames and
>      all channels in a frame */
>  static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
> -                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
> +                          celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
> +                          int arch)
>  {
>     const int overlap = mode->overlap;
>     int N;
> @@ -435,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
>        for (b=0;b<B;b++)
>        {
>           /* Interleaving the sub-frames while doing the MDCTs */
> -         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
> +         clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
> +                          &out[b+c*N*B], mode->window, overlap, shift, B,
> +                          arch);
>        }
>     } while (++c<CC);
>     if (CC==2&&C==1)
> @@ -1603,14 +1606,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
>     ALLOC(bandLogE2, C*nbEBands, opus_val16);
>     if (secondMdct)
>     {
> -      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
> +      compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
>        compute_band_energies(mode, freq, bandE, effEnd, C, LM);
>        amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
>        for (i=0;i<C*nbEBands;i++)
>           bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
>     }
>
> -   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
> +   compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
>     if (CC==2&&C==1)
>        tf_chan = 0;
>     compute_band_energies(mode, freq, bandE, effEnd, C, LM);
> @@ -1736,7 +1739,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
>        {
>           isTransient = 1;
>           shortBlocks = M;
> -         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
> +         compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
>           compute_band_energies(mode, freq, bandE, effEnd, C, LM);
>           amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
>           /* Compensate for the scaling of short vs long mdcts */
> diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
> index 74d527e..10c3679 100644
> --- a/celt/dump_modes/Makefile
> +++ b/celt/dump_modes/Makefile
> @@ -1,10 +1,31 @@
> +
>  CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
>  INCLUDES=-I. -I../ -I../.. -I../../include
>
> +SOURCES = dump_modes.c \
> +          ../modes.c \
> +          ../cwrs.c \
> +          ../rate.c \
> +          ../entenc.c \
> +          ../entdec.c \
> +          ../mathops.c \
> +          ../mdct.c \
> +          ../kiss_fft.c
> +
> +ifdef HAVE_ARM_NE10
> +CC = gcc
> +CFLAGS += -mfpu=neon
> +INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_NEON_INTR
> +LIBDIR = -l:$(NE10_LIBDIR)/libNE10.so
> +SOURCES += ../arm/celt_ne10_fft.c \
> +           dump_modes_arm_ne10.c \
> +           ../arm/armcpu.c
> +endif
> +
>  all: dump_modes
>
>  dump_modes:
> -       $(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES dump_modes.c ../modes.c ../cwrs.c ../rate.c ../entenc.c ../entdec.c ../mathops.c ../mdct.c ../kiss_fft.c -o dump_modes -lm
> +       $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBDIR) -lm
>
>  clean:
>         rm -f dump_modes
> diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
> index ae6a8c1..9105a53 100644
> --- a/celt/dump_modes/dump_modes.c
> +++ b/celt/dump_modes/dump_modes.c
> @@ -35,6 +35,7 @@
>  #include "modes.h"
>  #include "celt.h"
>  #include "rate.h"
> +#include "dump_modes_arch.h"
>
>  #define INT16 "%d"
>  #define INT32 "%d"
> @@ -62,6 +63,10 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
>     fprintf(file, "\n   It contains static definitions for some pre-defined modes. */\n");
>     fprintf(file, "#include \"modes.h\"\n");
>     fprintf(file, "#include \"rate.h\"\n");
> +   fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
> +   fprintf(file, "#define OVERRIDE_FFT 1\n");
> +   fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
> +   fprintf(file, "#endif\n");
>
>     fprintf(file, "\n");
>
> @@ -149,6 +154,9 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
>           fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
>        fprintf (file, "};\n");
>
> +#ifdef OVERRIDE_FFT
> +      dump_mode_arch(mode);
> +#endif
>        /* FFT Bitrev tables */
>        for (k=0;k<=mode->mdct.maxshift;k++)
>        {
> @@ -183,6 +191,13 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
>           fprintf (file, "},    /* factors */\n");
>           fprintf (file, "fft_bitrev%d,    /* bitrev */\n", mode->mdct.kfft[k]->nfft);
>           fprintf (file, "fft_twiddles%d_%d,    /* bitrev */\n", mode->Fs, mdctSize);
> +
> +         fprintf (file, "#ifdef OVERRIDE_FFT\n");
> +         fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
> +         fprintf (file, "#else\n");
> +         fprintf (file, "NULL,\n");
> +         fprintf(file, "#endif\n");
> +
>           fprintf (file, "};\n");
>
>           fprintf(file, "#endif\n");
> @@ -323,8 +338,14 @@ int main(int argc, char **argv)
>        }
>     }
>     file = fopen(BASENAME ".h", "w");
> +#ifdef OVERRIDE_FFT
> +   dump_modes_arch_init(m, nb);
> +#endif
>     dump_modes(file, m, nb);
>     fclose(file);
> +#ifdef OVERRIDE_FFT
> +   dump_modes_arch_finalize();
> +#endif
>     for (i=0;i<nb;i++)
>        opus_custom_mode_destroy(m[i]);
>     free(m);
> diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
> new file mode 100644
> index 0000000..1436926
> --- /dev/null
> +++ b/celt/dump_modes/dump_modes_arch.h
> @@ -0,0 +1,41 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> +   Written by Viswanath Puttagunta */
> +/*
> +   Redistribution and use in source and binary forms, with or without
> +   modification, are permitted provided that the following conditions
> +   are met:
> +
> +   - Redistributions of source code must retain the above copyright
> +   notice, this list of conditions and the following disclaimer.
> +
> +   - Redistributions in binary form must reproduce the above copyright
> +   notice, this list of conditions and the following disclaimer in the
> +   documentation and/or other materials provided with the distribution.
> +
> +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef DUMP_MODE_ARCH_H
> +#define DUMP_MODE_ARCH_H
> +
> +void dump_modes_arch_init();
> +void dump_mode_arch(CELTMode *mode);
> +void dump_modes_arch_finalize();
> +
> +#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
> +
> +#if defined(HAVE_ARM_NE10)
> +#define OVERRIDE_FFT (1)
> +#endif
> +
> +#endif
> diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c
> new file mode 100644
> index 0000000..aa53f17
> --- /dev/null
> +++ b/celt/dump_modes/dump_modes_arm_ne10.c
> @@ -0,0 +1,125 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> +   Written by Viswanath Puttagunta */
> +/*
> +   Redistribution and use in source and binary forms, with or without
> +   modification, are permitted provided that the following conditions
> +   are met:
> +
> +   - Redistributions of source code must retain the above copyright
> +   notice, this list of conditions and the following disclaimer.
> +
> +   - Redistributions in binary form must reproduce the above copyright
> +   notice, this list of conditions and the following disclaimer in the
> +   documentation and/or other materials provided with the distribution.
> +
> +   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> +   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> +   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> +   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> +   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> +   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> +   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> +   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include "modes.h"
> +#include "dump_modes_arch.h"
> +#include <NE10_dsp.h>
> +
> +static FILE *file;
> +
> +void dump_modes_arch_init(CELTMode **modes, int nb_modes)
> +{
> +   int i;
> +
> +   file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
> +   fprintf(file, "/* The contents of this file was automatically generated by\n");
> +   fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
> +   for (i=0;i<nb_modes;i++)
> +   {
> +      CELTMode *mode = modes[i];
> +      fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
> +   }
> +   fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
> +   fprintf(file, "#include <NE10_init.h>\n\n");
> +}
> +
> +void dump_modes_arch_finalize()
> +{
> +   fclose(file);
> +}
> +
> +void dump_mode_arch(CELTMode *mode)
> +{
> +   int k, j;
> +   int mdctSize;
> +
> +   mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
> +
> +   fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
> +   fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
> +   ne10_fft_cfg_float32_t cfg;
> +   /* cfg->factors */
> +   for(k=0;k<=mode->mdct.maxshift;k++) {
> +      cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
> +      if (!cfg)
> +         continue;
> +      fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
> +              mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
> +      for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
> +         fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
> +      }
> +      fprintf (file, "};\n");
> +   }
> +
> +   /* cfg->twiddles */
> +   for(k=0;k<=mode->mdct.maxshift;k++) {
> +      cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
> +      if (!cfg)
> +         continue;
> +      fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n",
> +              mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
> +      for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
> +         fprintf(file, "{%#0.8gf,%#0.8gf},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
> +      }
> +      fprintf (file, "};\n");
> +   }
> +
> +   for(k=0;k<=mode->mdct.maxshift;k++) {
> +      cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
> +      if (!cfg) {
> +         fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
> +                 mode->mdct.kfft[k]->nfft);
> +         fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
> +         fprintf(file, "0,\n");
> +         fprintf(file, "NULL\n");
> +         fprintf(file, "};\n");
> +         continue;
> +      }
> +      fprintf(file, "static const ne10_fft_state_float32_t ne10_fft_state_float32_%d = {\n",
> +              mode->mdct.kfft[k]->nfft);
> +      fprintf(file, "%d,\n", cfg->nfft);
> +      fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
> +      fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft);
> +      fprintf(file, "NULL,\n");  /* buffer */
> +      fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n",
> +              mode->mdct.kfft[k]->nfft, cfg->nfft);
> +      fprintf(file, "/* is_forward_scaled = true */\n");
> +      fprintf(file, "(ne10_int32_t) 1,\n");
> +      fprintf(file, "/* is_backward_scaled = false */\n");
> +      fprintf(file, "(ne10_int32_t) 0,\n");
> +      fprintf(file, "};\n");
> +
> +      fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
> +              mode->mdct.kfft[k]->nfft);
> +      fprintf(file, "1,\n");
> +      fprintf(file, "(void *)&ne10_fft_state_float32_%d,\n", mode->mdct.kfft[k]->nfft);
> +      fprintf(file, "};\n\n");
> +   }
> +   fprintf(file, "#endif  /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
> +}
> diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
> index cc487fc..38fd4fb 100644
> --- a/celt/kiss_fft.c
> +++ b/celt/kiss_fft.c
> @@ -423,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
>  #endif
>  }
>
> +int opus_fft_alloc_arch_c(kiss_fft_state *st) {
> +   (void)st;
> +   return 0;
> +}
> +
>  /*
>   *
>   * Allocates all necessary storage space for the fft and ifft.
>   * The return value is a contiguous block of memory.  As such,
>   * It can be freed with free().
>   * */
> -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  const kiss_fft_state *base)
> +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
> +                                        const kiss_fft_state *base, int arch)
>  {
>      kiss_fft_state *st=NULL;
>      size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
> @@ -478,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,  co
>          if (st->bitrev==NULL)
>              goto fail;
>          compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
> +
> +        /* Initialize architecture specific fft parameters */
> +        if (opus_fft_alloc_arch(st, arch))
> +            goto fail;
>      }
>      return st;
>  fail:
> -    opus_fft_free(st);
> +    opus_fft_free(st, arch);
>      return NULL;
>  }
>
> -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
> +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
>  {
> -   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
> +   return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
> +}
> +
> +void opus_fft_free_arch_c(kiss_fft_state *st) {
> +   (void)st;
>  }
>
> -void opus_fft_free(const kiss_fft_state *cfg)
> +void opus_fft_free(const kiss_fft_state *cfg, int arch)
>  {
>     if (cfg)
>     {
> +      opus_fft_free_arch((kiss_fft_state *)cfg, arch);
>        opus_free((opus_int16*)cfg->bitrev);
>        if (cfg->shift < 0)
>           opus_free((kiss_twiddle_cpx*)cfg->twiddles);
> @@ -551,7 +566,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
>      }
>  }
>
> -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
> +void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
>  {
>     int i;
>     opus_val16 scale;
> diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h
> index 390b54d..11b14bb 100644
> --- a/celt/kiss_fft.h
> +++ b/celt/kiss_fft.h
> @@ -32,6 +32,7 @@
>  #include <stdlib.h>
>  #include <math.h>
>  #include "arch.h"
> +#include "cpu_support.h"
>
>  #ifdef __cplusplus
>  extern "C" {
> @@ -77,6 +78,11 @@ typedef struct {
>   4*4*4*2
>   */
>
> +typedef struct arch_fft_state{
> +   int is_supported;
> +   void *priv;
> +} arch_fft_state;
> +
>  typedef struct kiss_fft_state{
>      int nfft;
>      opus_val16 scale;
> @@ -87,8 +93,13 @@ typedef struct kiss_fft_state{
>      opus_int16 factors[2*MAXFACTORS];
>      const opus_int16 *bitrev;
>      const kiss_twiddle_cpx *twiddles;
> +    arch_fft_state *arch_fft;
>  } kiss_fft_state;
>
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +#include "arm/fft_arm.h"
> +#endif
> +
>  /*typedef struct kiss_fft_state* kiss_fft_cfg;*/
>
>  /**
> @@ -114,9 +125,9 @@ typedef struct kiss_fft_state{
>   *      buffer size in *lenmem.
>   * */
>
> -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
> +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
>
> -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
> +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
>
>  /**
>   * opus_fft(cfg,in_out_buf)
> @@ -128,13 +139,48 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
>   * Note that each element is complex and can be accessed like
>      f[k].r and f[k].i
>   * */
> -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
> +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
>  void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
>
>  void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
>  void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
>
> -void opus_fft_free(const kiss_fft_state *cfg);
> +void opus_fft_free(const kiss_fft_state *cfg, int arch);
> +
> +
> +void opus_fft_free_arch_c(kiss_fft_state *st);
> +int opus_fft_alloc_arch_c(kiss_fft_state *st);
> +
> +#if !defined(OVERRIDE_OPUS_FFT)
> +/* Is run-time CPU detection enabled on this platform? */
> +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
> +
> +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st);
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> +         ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
> +
> +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st);
> +#define opus_fft_free_arch(_st, arch) \
> +         ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
> +
> +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
> +                                        const kiss_fft_cpx *fin,
> +                                        kiss_fft_cpx *fout);
> +#define opus_fft(_cfg, _fin, _fout, arch) \
> +   ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
> +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> +         opus_fft_alloc_arch_c(_st)
> +
> +#define opus_fft_free_arch(_st, arch) \
> +         opus_fft_free_arch_c(_st)
> +
> +#define opus_fft(_cfg, _fin, _fout, arch) \
> +         opus_fft_c(_cfg, _fin, _fout)
> +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
>
>  #ifdef __cplusplus
>  }
> diff --git a/celt/mdct.c b/celt/mdct.c
> index 2795d90..ee6d80e 100644
> --- a/celt/mdct.c
> +++ b/celt/mdct.c
> @@ -60,7 +60,7 @@
>
>  #ifdef CUSTOM_MODES
>
> -int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
> +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
>  {
>     int i;
>     kiss_twiddle_scalar *trig;
> @@ -71,9 +71,9 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
>     for (i=0;i<=maxshift;i++)
>     {
>        if (i==0)
> -         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
> +         l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
>        else
> -         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
> +         l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
>  #ifndef ENABLE_TI_DSPLIB55
>        if (l->kfft[i]==NULL)
>           return 0;
> @@ -104,11 +104,11 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
>     return 1;
>  }
>
> -void clt_mdct_clear(mdct_lookup *l)
> +void clt_mdct_clear(mdct_lookup *l, int arch)
>  {
>     int i;
>     for (i=0;i<=l->maxshift;i++)
> -      opus_fft_free(l->kfft[i]);
> +      opus_fft_free(l->kfft[i], arch);
>     opus_free((kiss_twiddle_scalar*)l->trig);
>  }
>
> @@ -116,8 +116,8 @@ void clt_mdct_clear(mdct_lookup *l)
>
>  /* Forward MDCT trashes the input array */
>  #ifndef OVERRIDE_clt_mdct_forward
> -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
> -      const opus_val16 *window, int overlap, int shift, int stride)
> +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
> +      const opus_val16 *window, int overlap, int shift, int stride, int arch)
>  {
>     int i;
>     int N, N2, N4;
> @@ -132,6 +132,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
>     int scale_shift = st->scale_shift-1;
>  #endif
>     SAVE_STACK;
> +   (void)arch;
>     scale = st->scale;
>
>     N = l->n;
> diff --git a/celt/mdct.h b/celt/mdct.h
> index d721821..cbaf679 100644
> --- a/celt/mdct.h
> +++ b/celt/mdct.h
> @@ -53,13 +53,19 @@ typedef struct {
>     const kiss_twiddle_scalar * OPUS_RESTRICT trig;
>  } mdct_lookup;
>
> -int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
> -void clt_mdct_clear(mdct_lookup *l);
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +#include "arm/mdct_arm.h"
> +#endif
> +
> +
> +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
> +void clt_mdct_clear(mdct_lookup *l, int arch);
>
>  /** Compute a forward MDCT and scale by 4/N, trashes the input array */
> -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
> -      kiss_fft_scalar * OPUS_RESTRICT out,
> -      const opus_val16 *window, int overlap, int shift, int stride);
> +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
> +                        kiss_fft_scalar * OPUS_RESTRICT out,
> +                        const opus_val16 *window, int overlap,
> +                        int shift, int stride, int arch);
>
>  /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
>      (scales implicitly by 1/2) */
> @@ -67,4 +73,27 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
>        kiss_fft_scalar * OPUS_RESTRICT out,
>        const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
>
> +#if !defined(OVERRIDE_OPUS_MDCT)
> +/* Is run-time CPU detection enabled on this platform? */
> +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
> +
> +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
> +                                                     kiss_fft_scalar *in,
> +                                                     kiss_fft_scalar * OPUS_RESTRICT out,
> +                                                     const opus_val16 *window,
> +                                                     int overlap, int shift,
> +                                                     int stride, int arch);
> +
> +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
> +   ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
> +                                                   _window, _overlap, _shift, \
> +                                                   _stride, _arch))
> +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +
> +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
> +   clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
> +
> +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
> +
>  #endif
> diff --git a/celt/modes.c b/celt/modes.c
> index 42e68e1..4fe91ff 100644
> --- a/celt/modes.c
> +++ b/celt/modes.c
> @@ -37,6 +37,7 @@
>  #include "os_support.h"
>  #include "stack_alloc.h"
>  #include "quant_bands.h"
> +#include "cpu_support.h"
>
>  static const opus_int16 eband5ms[] = {
>  /*0  200 400 600 800  1k 1.2 1.4 1.6  2k 2.4 2.8 3.2  4k 4.8 5.6 6.8  8k 9.6 12k 15.6 */
> @@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
>     opus_val16 *window;
>     opus_int16 *logN;
>     int LM;
> +   int arch = opus_select_arch();
>     ALLOC_STACK;
>  #if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
>     if (global_stack==NULL)
> @@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
>     compute_pulse_cache(mode, mode->maxLM);
>
>     if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
> -           mode->maxLM) == 0)
> +                     mode->maxLM, arch) == 0)
>        goto failure;
>
>     if (error)
> @@ -408,6 +410,8 @@ failure:
>  #ifdef CUSTOM_MODES
>  void opus_custom_mode_destroy(CELTMode *mode)
>  {
> +   int arch = opus_select_arch();
> +
>     if (mode == NULL)
>        return;
>  #ifndef CUSTOM_MODES_ONLY
> @@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
>     opus_free((opus_int16*)mode->cache.index);
>     opus_free((unsigned char*)mode->cache.bits);
>     opus_free((unsigned char*)mode->cache.caps);
> -   clt_mdct_clear(&mode->mdct);
> +   clt_mdct_clear(&mode->mdct, arch);
>
>     opus_free((CELTMode *)mode);
>  }
> diff --git a/celt/static_modes_float.h b/celt/static_modes_float.h
> index 2fadb62..e102a38 100644
> --- a/celt/static_modes_float.h
> +++ b/celt/static_modes_float.h
> @@ -4,6 +4,11 @@
>  #include "modes.h"
>  #include "rate.h"
>
> +#ifdef HAVE_ARM_NE10
> +#define OVERRIDE_FFT 1
> +#include "static_modes_float_arm_ne10.h"
> +#endif
> +
>  #ifndef DEF_WINDOW120
>  #define DEF_WINDOW120
>  static const opus_val16 window120[120] = {
> @@ -431,6 +436,11 @@ static const kiss_fft_state fft_state48000_960_0 = {
>  {5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, },   /* factors */
>  fft_bitrev480,  /* bitrev */
>  fft_twiddles48000_960,  /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_480,
> +#else
> +NULL,
> +#endif
>  };
>  #endif
>
> @@ -443,6 +453,11 @@ static const kiss_fft_state fft_state48000_960_1 = {
>  {5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
>  fft_bitrev240,  /* bitrev */
>  fft_twiddles48000_960,  /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_240,
> +#else
> +NULL,
> +#endif
>  };
>  #endif
>
> @@ -455,6 +470,11 @@ static const kiss_fft_state fft_state48000_960_2 = {
>  {5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
>  fft_bitrev120,  /* bitrev */
>  fft_twiddles48000_960,  /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_120,
> +#else
> +NULL,
> +#endif
>  };
>  #endif
>
> @@ -467,6 +487,11 @@ static const kiss_fft_state fft_state48000_960_3 = {
>  {5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },    /* factors */
>  fft_bitrev60,   /* bitrev */
>  fft_twiddles48000_960,  /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_60,
> +#else
> +NULL,
> +#endif
>  };
>  #endif
>
> diff --git a/celt/static_modes_float_arm_ne10.h b/celt/static_modes_float_arm_ne10.h
> new file mode 100644
> index 0000000..5bcec70
> --- /dev/null
> +++ b/celt/static_modes_float_arm_ne10.h
> @@ -0,0 +1,404 @@
> +/* The contents of this file was automatically generated by
> + * dump_mode_arm_ne10.c with arguments: 48000 960
> + * It contains static definitions for some pre-defined modes. */
> +#include <NE10_init.h>
> +
> +#ifndef NE10_FFT_PARAMS48000_960
> +#define NE10_FFT_PARAMS48000_960
> +static const ne10_int32_t ne10_factors_480[64] = {
> +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_240[64] = {
> +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_120[64] = {
> +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_60[64] = {
> +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
> +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
> +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
> +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
> +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
> +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
> +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
> +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
> +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
> +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
> +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
> +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
> +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
> +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
> +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
> +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
> +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
> +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
> +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
> +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
> +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
> +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
> +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
> +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
> +{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
> +{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
> +{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
> +{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
> +{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
> +{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
> +{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
> +{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
> +{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
> +{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
> +{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
> +{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
> +{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
> +{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
> +{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
> +{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
> +{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
> +{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
> +{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
> +{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
> +{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
> +{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
> +{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
> +{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
> +{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
> +{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
> +{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
> +{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
> +{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
> +{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
> +{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
> +{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
> +{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
> +{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
> +{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
> +{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
> +{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
> +{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
> +{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
> +{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
> +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
> +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
> +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
> +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
> +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
> +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
> +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
> +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
> +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
> +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
> +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
> +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
> +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
> +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
> +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
> +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
> +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
> +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
> +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
> +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
> +{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
> +{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
> +{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
> +{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
> +{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
> +{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
> +{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
> +{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
> +{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
> +{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
> +{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
> +{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
> +{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
> +{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
> +{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
> +{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
> +{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
> +{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
> +{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
> +{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
> +{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
> +{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
> +{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
> +{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
> +{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
> +{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
> +{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
> +{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
> +{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
> +{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
> +{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
> +{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
> +{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
> +{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
> +{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
> +{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
> +{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
> +{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
> +{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
> +{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
> +{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
> +{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
> +{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
> +{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
> +{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
> +{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
> +{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
> +{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
> +{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
> +{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
> +{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
> +{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
> +{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
> +{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
> +{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
> +{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
> +{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
> +{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
> +{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
> +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
> +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
> +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
> +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
> +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
> +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
> +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
> +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
> +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
> +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
> +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
> +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
> +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
> +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
> +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
> +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
> +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
> +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
> +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
> +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
> +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
> +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
> +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
> +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
> +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
> +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
> +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
> +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
> +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
> +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
> +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
> +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
> +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
> +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
> +{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
> +{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
> +{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
> +{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
> +{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
> +{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
> +{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
> +{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
> +{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
> +{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
> +{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
> +{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
> +{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
> +{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
> +{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
> +{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
> +{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
> +{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
> +{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
> +{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
> +{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
> +{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
> +{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
> +{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
> +{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
> +{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
> +{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
> +{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
> +{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
> +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
> +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
> +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
> +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
> +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
> +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
> +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
> +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
> +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
> +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
> +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
> +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
> +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
> +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
> +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
> +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
> +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
> +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
> +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
> +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
> +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
> +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
> +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
> +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
> +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
> +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
> +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
> +};
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_480 = {
> +120,
> +(ne10_int32_t *)ne10_factors_480,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_480 = {
> +1,
> +(void *)&ne10_fft_state_float32_480,
> +};
> +
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_240 = {
> +60,
> +(ne10_int32_t *)ne10_factors_240,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_240 = {
> +1,
> +(void *)&ne10_fft_state_float32_240,
> +};
> +
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_120 = {
> +30,
> +(ne10_int32_t *)ne10_factors_120,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_120 = {
> +1,
> +(void *)&ne10_fft_state_float32_120,
> +};
> +
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_60 = {
> +15,
> +(ne10_int32_t *)ne10_factors_60,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_60 = {
> +1,
> +(void *)&ne10_fft_state_float32_60,
> +};
> +
> +#endif  /* end NE10_FFT_PARAMS48000_960 */
> diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c
> index 57db0e3..4a2f8af 100644
> --- a/celt/tests/test_unit_dft.c
> +++ b/celt/tests/test_unit_dft.c
> @@ -40,11 +40,27 @@
>  #define CELT_C
>  #define TEST_UNIT_DFT_C
>  #include "stack_alloc.h"
> +#include "pitch.h"
> +#include "celt_lpc.c"
> +#include "pitch.c"
>  #include "kiss_fft.h"
>  #include "kiss_fft.c"
> +#include "mdct.c"
>  #include "mathops.c"
>  #include "entcode.c"
>
> +#if defined(OPUS_HAVE_RTCD) && \
> +         (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_NEON_INTR))
> +#include "arm/armcpu.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#include "arm/celt_neon_intr.c"
> +#include "arm/arm_celt_map.c"
> +#endif
> +#elif defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
> +#include "x86/x86cpu.c"
> +#endif
>
>  #ifndef M_PI
>  #define M_PI 3.141592653
> @@ -93,13 +109,13 @@ void check(kiss_fft_cpx  * in,kiss_fft_cpx  * out,int nfft,int isinverse)
>      }
>  }
>
> -void test1d(int nfft,int isinverse)
> +void test1d(int nfft,int isinverse,int arch)
>  {
>      size_t buflen = sizeof(kiss_fft_cpx)*nfft;
>
>      kiss_fft_cpx  * in = (kiss_fft_cpx*)malloc(buflen);
>      kiss_fft_cpx  * out= (kiss_fft_cpx*)malloc(buflen);
> -    kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0);
> +    kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch);
>      int k;
>
>      for (k=0;k<nfft;++k) {
> @@ -125,7 +141,7 @@ void test1d(int nfft,int isinverse)
>      if (isinverse)
>         opus_ifft(cfg,in,out);
>      else
> -       opus_fft(cfg,in,out);
> +       opus_fft(cfg,in,out, arch);
>
>      /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
>
> @@ -139,26 +155,28 @@ void test1d(int nfft,int isinverse)
>  int main(int argc,char ** argv)
>  {
>      ALLOC_STACK;
> +    int arch = opus_select_arch();
> +
>      if (argc>1) {
>          int k;
>          for (k=1;k<argc;++k) {
> -            test1d(atoi(argv[k]),0);
> -            test1d(atoi(argv[k]),1);
> +            test1d(atoi(argv[k]),0,arch);
> +            test1d(atoi(argv[k]),1,arch);
>          }
>      }else{
> -        test1d(32,0);
> -        test1d(32,1);
> -        test1d(128,0);
> -        test1d(128,1);
> -        test1d(256,0);
> -        test1d(256,1);
> +        test1d(32,0,arch);
> +        test1d(32,1,arch);
> +        test1d(128,0,arch);
> +        test1d(128,1,arch);
> +        test1d(256,0,arch);
> +        test1d(256,1,arch);
>  #ifndef RADIX_TWO_ONLY
> -        test1d(36,0);
> -        test1d(36,1);
> -        test1d(50,0);
> -        test1d(50,1);
> -        test1d(120,0);
> -        test1d(120,1);
> +        test1d(36,0,arch);
> +        test1d(36,1,arch);
> +        test1d(50,0,arch);
> +        test1d(50,1,arch);
> +        test1d(120,0,arch);
> +        test1d(120,1,arch);
>  #endif
>      }
>      return ret;
> diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c
> index b9b1bcf..0f1e4f1 100644
> --- a/celt/tests/test_unit_mathops.c
> +++ b/celt/tests/test_unit_mathops.c
> @@ -49,6 +49,8 @@
>  #include "cwrs.c"
>  #include "pitch.c"
>  #include "celt_lpc.c"
> +#include "kiss_fft.c"
> +#include "mdct.c"
>
>  #if defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)
>  #include "x86/pitch_sse.c"
> @@ -60,6 +62,10 @@
>         || defined(OPUS_ARM_NEON_INTR))
>  #if defined(OPUS_ARM_NEON_INTR)
>  #include "arm/celt_neon_intr.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#endif
>  #endif
>  #include "arm/arm_celt_map.c"
>  #endif
> diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c
> index ac8957f..4c27b4c 100644
> --- a/celt/tests/test_unit_mdct.c
> +++ b/celt/tests/test_unit_mdct.c
> @@ -43,9 +43,24 @@
>
>  #include "kiss_fft.c"
>  #include "mdct.c"
> +#include "pitch.c"
> +#include "celt_lpc.c"
>  #include "mathops.c"
>  #include "entcode.c"
>
> +#if defined(OPUS_HAVE_RTCD) && \
> +         (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_NEON_INTR))
> +#include "arm/armcpu.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#include "arm/celt_neon_intr.c"
> +#include "arm/arm_celt_map.c"
> +#endif
> +#elif defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
> +#include "x86/x86cpu.c"
> +#endif
> +
>  #ifndef M_PI
>  #define M_PI 3.141592653
>  #endif
> @@ -112,7 +127,7 @@ void check_inv(kiss_fft_scalar  * in,kiss_fft_scalar  * out,int nfft,int isinver
>  }
>
>
> -void test1d(int nfft,int isinverse)
> +void test1d(int nfft,int isinverse,int arch)
>  {
>      mdct_lookup cfg;
>      size_t buflen = sizeof(kiss_fft_scalar)*nfft;
> @@ -123,7 +138,7 @@ void test1d(int nfft,int isinverse)
>      opus_val16  * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
>      int k;
>
> -    clt_mdct_init(&cfg, nfft, 0);
> +    clt_mdct_init(&cfg, nfft, 0, arch);
>      for (k=0;k<nfft;++k) {
>          in[k] = (rand() % 32768) - 16384;
>      }
> @@ -156,7 +171,7 @@ void test1d(int nfft,int isinverse)
>            out[nfft-k-1] = out[nfft/2+k];
>         check_inv(in,out,nfft,isinverse);
>      } else {
> -       clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1);
> +       clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, arch);
>         check(in_copy,out,nfft,isinverse);
>      }
>      /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
> @@ -164,46 +179,48 @@ void test1d(int nfft,int isinverse)
>
>      free(in);
>      free(out);
> -    clt_mdct_clear(&cfg);
> +    clt_mdct_clear(&cfg, arch);
>  }
>
>  int main(int argc,char ** argv)
>  {
>      ALLOC_STACK;
> +    int arch = opus_select_arch();
> +
>      if (argc>1) {
>          int k;
>          for (k=1;k<argc;++k) {
> -            test1d(atoi(argv[k]),0);
> -            test1d(atoi(argv[k]),1);
> +            test1d(atoi(argv[k]),0,arch);
> +            test1d(atoi(argv[k]),1,arch);
>          }
>      }else{
> -        test1d(32,0);
> -        test1d(32,1);
> -        test1d(256,0);
> -        test1d(256,1);
> -        test1d(512,0);
> -        test1d(512,1);
> -        test1d(1024,0);
> -        test1d(1024,1);
> -        test1d(2048,0);
> -        test1d(2048,1);
> +        test1d(32,0,arch);
> +        test1d(32,1,arch);
> +        test1d(256,0,arch);
> +        test1d(256,1,arch);
> +        test1d(512,0,arch);
> +        test1d(512,1,arch);
> +        test1d(1024,0,arch);
> +        test1d(1024,1,arch);
> +        test1d(2048,0,arch);
> +        test1d(2048,1,arch);
>  #ifndef RADIX_TWO_ONLY
> -        test1d(36,0);
> -        test1d(36,1);
> -        test1d(40,0);
> -        test1d(40,1);
> -        test1d(60,0);
> -        test1d(60,1);
> -        test1d(120,0);
> -        test1d(120,1);
> -        test1d(240,0);
> -        test1d(240,1);
> -        test1d(480,0);
> -        test1d(480,1);
> -        test1d(960,0);
> -        test1d(960,1);
> -        test1d(1920,0);
> -        test1d(1920,1);
> +        test1d(36,0,arch);
> +        test1d(36,1,arch);
> +        test1d(40,0,arch);
> +        test1d(40,1,arch);
> +        test1d(60,0,arch);
> +        test1d(60,1,arch);
> +        test1d(120,0,arch);
> +        test1d(120,1,arch);
> +        test1d(240,0,arch);
> +        test1d(240,1,arch);
> +        test1d(480,0,arch);
> +        test1d(480,1,arch);
> +        test1d(960,0,arch);
> +        test1d(960,1,arch);
> +        test1d(1920,0,arch);
> +        test1d(1920,1,arch);
>  #endif
>      }
>      return ret;
> diff --git a/celt/tests/test_unit_rotation.c b/celt/tests/test_unit_rotation.c
> index 5507884..ce14936 100644
> --- a/celt/tests/test_unit_rotation.c
> +++ b/celt/tests/test_unit_rotation.c
> @@ -46,6 +46,8 @@
>  #include "bands.h"
>  #include "pitch.c"
>  #include "celt_lpc.c"
> +#include "kiss_fft.c"
> +#include "mdct.c"
>  #include <math.h>
>
>  #if defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)
> @@ -59,6 +61,10 @@
>  #if defined(OPUS_ARM_NEON_INTR)
>  #include "arm/celt_neon_intr.c"
>  #endif
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#endif
>  #include "arm/arm_celt_map.c"
>  #endif
>
> diff --git a/celt_headers.mk b/celt_headers.mk
> index 5bb193e..c51c3ee 100644
> --- a/celt_headers.mk
> +++ b/celt_headers.mk
> @@ -31,11 +31,14 @@ celt/stack_alloc.h \
>  celt/vq.h \
>  celt/static_modes_float.h \
>  celt/static_modes_fixed.h \
> +celt/static_modes_float_arm_ne10.h \
>  celt/arm/armcpu.h \
>  celt/arm/fixed_armv4.h \
>  celt/arm/fixed_armv5e.h \
>  celt/arm/kiss_fft_armv4.h \
>  celt/arm/kiss_fft_armv5e.h \
>  celt/arm/pitch_arm.h \
> +celt/arm/fft_arm.h \
> +celt/arm/mdct_arm.h \
>  celt/x86/pitch_sse.h \
>  celt/x86/x86cpu.h
> diff --git a/celt_sources.mk b/celt_sources.mk
> index 29ec937..7121301 100644
> --- a/celt_sources.mk
> +++ b/celt_sources.mk
> @@ -35,3 +35,7 @@ celt/arm/armopts.s.in
>
>  CELT_SOURCES_ARM_NEON_INTR = \
>  celt/arm/celt_neon_intr.c
> +
> +CELT_SOURCES_ARM_NE10= \
> +celt/arm/celt_ne10_fft.c \
> +celt/arm/celt_ne10_mdct.c
> diff --git a/configure.ac b/configure.ac
> index 87cece9..baa3425 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -351,6 +351,80 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
>  AM_CONDITIONAL([HAVE_SSE4_1], [false])
>  AM_CONDITIONAL([HAVE_SSE2], [false])
>
> +AC_DEFUN([OPUS_PATH_NE10],
> +   [
> +      AC_ARG_WITH(NE10,
> +                  AC_HELP_STRING([--with-NE10=PFX],[Prefix where libNE10 is installed (optional)]),
> +                  NE10_prefix="$withval", NE10_prefix="")
> +      AC_ARG_WITH(NE10-libraries,
> +                  AC_HELP_STRING([--with-NE10-libraries=DIR],
> +                        [Directory where libNE10 library is installed (optional)]),
> +                  NE10_libraries="$withval", NE10_libraries="")
> +      AC_ARG_WITH(NE10-includes,
> +                  AC_HELP_STRING([--with-NE10-includes=DIR],
> +                                 [Directory where libNE10 header files are installed (optional)]),
> +                  NE10_includes="$withval", ogg_includes="")
> +
> +      if test "x$NE10_libraries" != "x" ; then
> +         NE10_LIBS="-L$NE10_libraries"
> +      elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
> +         NE10_LIBS=""
> +      elif test "x$NE10_prefix" != "x" ; then
> +         NE10_LIBS="-L$NE10_prefix/lib"
> +      elif test "x$prefix" != "xNONE" ; then
> +         NE10_LIBS="-L$prefix/lib"
> +      fi
> +
> +      if test "x$NE10_prefix" != "xno" ; then
> +         NE10_LIBS="$NE10_LIBS -lNE10"
> +      fi
> +
> +      if test "x$NE10_includes" != "x" ; then
> +         NE10_CFLAGS="-I$NE10_includes"
> +      elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
> +         NE10_CFLAGS=""
> +      elif test "x$ogg_prefix" != "x" ; then
> +         NE10_CFLAGS="-I$NE10_prefix/include"
> +      elif test "x$prefix" != "xNONE"; then
> +         NE10_CFLAGS="-I$prefix/include"
> +      fi
> +
> +      AC_MSG_CHECKING(for NE10)
> +      save_CFLAGS="$CFLAGS"; CFLAGS="$NE10_CFLAGS"
> +      save_LIBS="$LIBS"; LIBS="$NE10_LIBS"
> +      AC_LINK_IFELSE(
> +         [
> +            AC_LANG_PROGRAM(
> +               [[#include <NE10_init.h>
> +               ]],
> +               [[
> +                  ne10_fft_cfg_float32_t cfg;
> +                  cfg = ne10_fft_alloc_c2c_float32_neon(480);
> +               ]]
> +            )
> +         ],[
> +            HAVE_ARM_NE10=1
> +            AC_MSG_RESULT([yes])
> +         ],[
> +            HAVE_ARM_NE10=0
> +            AC_MSG_RESULT([no])
> +            NE10_CFLAGS=""
> +            NE10_LIBS=""
> +         ]
> +      )
> +      CFLAGS="$save_CFLAGS"; LIBS="$save_LIBS"
> +      #Now we know if libNE10 is installed or not
> +      AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
> +         [
> +            AC_DEFINE([HAVE_ARM_NE10], 1, [NE10 library is installed on host. Make sure it is on target!])
> +            AC_SUBST(HAVE_ARM_NE10)
> +            AC_SUBST(NE10_CFLAGS)
> +            AC_SUBST(NE10_LIBS)
> +         ],[]
> +      )
> +   ]
> +)
> +
>  AS_IF([test x"$enable_intrinsics" = x"yes"],[
>     case $host_cpu in
>     arm*)
> @@ -391,6 +465,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
>              AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
>              AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
>              AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
> +
> +            OPUS_PATH_NE10()
> +            AS_IF([test x"$NE10_LIBS" != "x"],
> +                  [enable_intrinsics="$enable_intrinsics NE10"],[])
>           ],
>           [
>              AC_MSG_WARN([Compiler does not support ARM intrinsics])
> @@ -516,6 +594,9 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
>  AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
>  AM_CONDITIONAL([OPUS_ARM_NEON_INTR],
>      [test x"$OPUS_ARM_NEON_INTR" = x"1"])
> +AM_CONDITIONAL([HAVE_ARM_NE10],
> +    [test x"$HAVE_ARM_NE10" = x"1"])
> +
>
>  AS_IF([test x"$enable_rtcd" = x"yes"],[
>      AS_IF([test x"$rtcd_support" != x"no"],[
> diff --git a/src/analysis.c b/src/analysis.c
> index 2ee8533..e04b282 100644
> --- a/src/analysis.c
> +++ b/src/analysis.c
> @@ -189,7 +189,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
>     info_out->music_prob = psum;
>  }
>
> -static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
> +static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix, int arch)
>  {
>      int i, b;
>      const kiss_fft_state *kfft;
> @@ -262,7 +262,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
>      remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
>      downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
>      tonal->mem_fill = 240 + remaining;
> -    opus_fft(kfft, in, out);
> +    opus_fft(kfft, in, out, arch);
>  #ifndef FIXED_POINT
>      /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
>      if (celt_isnan(out[0].r))
> @@ -635,7 +635,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
>
>  void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
>                   int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
> -                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
> +                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch)
>  {
>     int offset;
>     int pcm_len;
> @@ -648,7 +648,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
>        pcm_len = analysis_frame_size - analysis->analysis_offset;
>        offset = analysis->analysis_offset;
>        do {
> -         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
> +         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix, arch);
>           offset += 480;
>           pcm_len -= 480;
>        } while (pcm_len>0);
> diff --git a/src/analysis.h b/src/analysis.h
> index 85a73d7..9c328e8 100644
> --- a/src/analysis.h
> +++ b/src/analysis.h
> @@ -82,6 +82,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
>
>  void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
>                   int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
> -                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
> +                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch);
>
>  #endif
> diff --git a/src/opus_encoder.c b/src/opus_encoder.c
> index d94163f..4656da5 100644
> --- a/src/opus_encoder.c
> +++ b/src/opus_encoder.c
> @@ -1006,7 +1006,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
>         analysis_read_subframe_bak = st->analysis.read_subframe;
>         run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
>               c1, c2, analysis_channels, st->Fs,
> -             lsb_depth, downmix, &analysis_info);
> +             lsb_depth, downmix, &analysis_info, st->arch);
>      }
>  #else
>      (void)analysis_pcm;
> diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
> index 6e87337..1281e85 100644
> --- a/src/opus_multistream_encoder.c
> +++ b/src/opus_multistream_encoder.c
> @@ -71,6 +71,7 @@ typedef void (*opus_copy_channel_in_func)(
>
>  struct OpusMSEncoder {
>     ChannelLayout layout;
> +   int arch;
>     int lfe_stream;
>     int application;
>     int variable_duration;
> @@ -218,7 +219,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b)
>  #endif
>
>  void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
> -      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
> +      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch
>  )
>  {
>     int c;
> @@ -257,7 +258,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
>        OPUS_COPY(in, mem+c*overlap, overlap);
>        (*copy_channel_in)(x, 1, pcm, channels, c, len);
>        celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
> -      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
> +      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
> +                       overlap, celt_mode->maxLM-LM, 1, arch);
>        if (upsample != 1)
>        {
>           int bound = len;
> @@ -411,6 +413,7 @@ static int opus_multistream_encoder_init_impl(
>         (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
>        return OPUS_BAD_ARG;
>
> +   st->arch = opus_select_arch();
>     st->layout.nb_channels = channels;
>     st->layout.nb_streams = streams;
>     st->layout.nb_coupled_streams = coupled_streams;
> @@ -767,7 +770,7 @@ static int opus_multistream_encode_native
>     ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
>     if (st->surround)
>     {
> -      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
> +      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
>     }
>
>     /* Compute bitrate allocation between streams (this could be a lot better) */
> --
> 1.7.9.5
>