[opus] [RFC PATCH v2] armv7(float): Optimize encode usecase using NE10 library
Viswanath Puttagunta
viswanath.puttagunta at linaro.org
Wed Feb 4 14:04:29 PST 2015
On 4 February 2015 at 12:21, Viswanath Puttagunta
<viswanath.puttagunta at linaro.org> wrote:
> Optimize opus encode (float only) usecase using ARM NE10
> library. Mainly effects opus_fft and ctl_mdct_forward
> and related functions.
>
> This optimization can be used for ARM CPUs that have NEON
> VFP unit. This patch only enables optimizations for ARMv7.
>
> Official ARM NE10 library page available at
> http://projectne10.github.io/Ne10/
>
> To enable this optimization, use
> --enable-intrinsics --with-NE10=<install_prefix>
> or
> --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir>
>
> Compile time checks made during configure process to make sure
> optimization option available only when compiler supports NEON
> instrinsics.
>
> Runtime checks made to make sure optimized functions only called
> on appropriate hardware.
> ---
> Makefile.am | 34 +--
> celt/arm/arm_celt_map.c | 47 +++-
> celt/arm/celt_ne10_fft.c | 120 ++++++++++
> celt/arm/celt_ne10_mdct.c | 158 +++++++++++++
> celt/arm/fft_arm.h | 66 ++++++
> celt/arm/mdct_arm.h | 53 +++++
> celt/celt_encoder.c | 13 +-
> celt/dump_modes/Makefile | 23 +-
> celt/dump_modes/dump_modes.c | 21 ++
> celt/dump_modes/dump_modes_arch.h | 41 ++++
> celt/dump_modes/dump_modes_arm_ne10.c | 125 ++++++++++
> celt/kiss_fft.c | 27 ++-
> celt/kiss_fft.h | 54 ++++-
> celt/mdct.c | 15 +-
> celt/mdct.h | 39 +++-
> celt/modes.c | 8 +-
> celt/static_modes_float.h | 25 ++
> celt/static_modes_float_arm_ne10.h | 404 +++++++++++++++++++++++++++++++++
> celt/tests/test_unit_dft.c | 52 +++--
> celt/tests/test_unit_mathops.c | 6 +
> celt/tests/test_unit_mdct.c | 81 ++++---
> celt/tests/test_unit_rotation.c | 6 +
> celt_headers.mk | 3 +
> celt_sources.mk | 4 +
> configure.ac | 81 +++++++
> src/analysis.c | 8 +-
> src/analysis.h | 2 +-
> src/opus_encoder.c | 2 +-
> src/opus_multistream_encoder.c | 9 +-
> 29 files changed, 1422 insertions(+), 105 deletions(-)
> create mode 100644 celt/arm/celt_ne10_fft.c
> create mode 100644 celt/arm/celt_ne10_mdct.c
> create mode 100644 celt/arm/fft_arm.h
> create mode 100644 celt/arm/mdct_arm.h
> create mode 100644 celt/dump_modes/dump_modes_arch.h
> create mode 100644 celt/dump_modes/dump_modes_arm_ne10.c
> create mode 100644 celt/static_modes_float_arm_ne10.h
>
> diff --git a/Makefile.am b/Makefile.am
> index 95323ca..c7d9533 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -10,7 +10,7 @@ lib_LTLIBRARIES = libopus.la
> DIST_SUBDIRS = doc
>
> AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
> - -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed
> + -I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
>
> include celt_sources.mk
> include silk_sources.mk
> @@ -47,6 +47,10 @@ CELT_SOURCES += $(CELT_SOURCES_ARM_NEON_INTR)
> OPUS_ARM_NEON_INTR_CPPFLAGS = -mfpu=neon
> endif
>
> +if HAVE_ARM_NE10
> +CELT_SOURCES += $(CELT_SOURCES_ARM_NE10)
> +endif
> +
> if OPUS_ARM_EXTERNAL_ASM
> nodist_libopus_la_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S)
> BUILT_SOURCES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) \
> @@ -64,7 +68,7 @@ include opus_headers.mk
>
> libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
> libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
> -libopus_la_LIBADD = $(LIBM)
> +libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
>
> pkginclude_HEADERS = include/opus.h include/opus_multistream.h include/opus_types.h include/opus_defines.h
>
> @@ -77,32 +81,32 @@ TESTS = celt/tests/test_unit_types celt/tests/test_unit_mathops celt/tests/test_
>
> opus_demo_SOURCES = src/opus_demo.c
>
> -opus_demo_LDADD = libopus.la $(LIBM)
> +opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> repacketizer_demo_SOURCES = src/repacketizer_demo.c
>
> -repacketizer_demo_LDADD = libopus.la $(LIBM)
> +repacketizer_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> opus_compare_SOURCES = src/opus_compare.c
> opus_compare_LDADD = $(LIBM)
>
> tests_test_opus_api_SOURCES = tests/test_opus_api.c tests/test_opus_common.h
> -tests_test_opus_api_LDADD = libopus.la $(LIBM)
> +tests_test_opus_api_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> tests_test_opus_encode_SOURCES = tests/test_opus_encode.c tests/test_opus_common.h
> -tests_test_opus_encode_LDADD = libopus.la $(LIBM)
> +tests_test_opus_encode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> tests_test_opus_decode_SOURCES = tests/test_opus_decode.c tests/test_opus_common.h
> -tests_test_opus_decode_LDADD = libopus.la $(LIBM)
> +tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
> -tests_test_opus_padding_LDADD = libopus.la $(LIBM)
> +tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
>
> celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
> celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
>
> celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
> -celt_tests_test_unit_dft_LDADD = $(LIBM)
> +celt_tests_test_unit_dft_LDADD = $(NE10_LIBS) $(LIBM)
>
> celt_tests_test_unit_entropy_SOURCES = celt/tests/test_unit_entropy.c
> celt_tests_test_unit_entropy_LDADD = $(LIBM)
> @@ -111,7 +115,7 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
> celt_tests_test_unit_laplace_LDADD = $(LIBM)
>
> celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
> -celt_tests_test_unit_mathops_LDADD = $(LIBM)
> +celt_tests_test_unit_mathops_LDADD = $(NE10_LIBS) $(LIBM)
> if CPU_ARM
> if OPUS_ARM_EXTERNAL_ASM
> celt_tests_test_unit_mathops_LDADD += libopus.la
> @@ -119,10 +123,10 @@ endif
> endif
>
> celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
> -celt_tests_test_unit_mdct_LDADD = $(LIBM)
> +celt_tests_test_unit_mdct_LDADD = $(NE10_LIBS) $(LIBM)
>
> celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
> -celt_tests_test_unit_rotation_LDADD = $(LIBM)
> +celt_tests_test_unit_rotation_LDADD = $(NE10_LIBS) $(LIBM)
> if CPU_ARM
> if OPUS_ARM_EXTERNAL_ASM
> celt_tests_test_unit_rotation_LDADD += libopus.la
> @@ -269,6 +273,8 @@ endif
>
> if OPUS_ARM_NEON_INTR
> CELT_ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
> - %test_unit_rotation.o %test_unit_mathops.o
> -$(CELT_ARM_NEON_INTR_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CPPFLAGS)
> + $(CELT_SOURCES_ARM_NE10:.c=.lo) \
> + %test_unit_rotation.o %test_unit_mathops.o \
> + %test_unit_mdct.o %test_unit_dft.o
> +$(CELT_ARM_NEON_INTR_OBJ): CFLAGS += $(OPUS_ARM_NEON_INTR_CPPFLAGS) $(NE10_CFLAGS)
> endif
> diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
> index 68c224d..3b49f90 100644
> --- a/celt/arm/arm_celt_map.c
> +++ b/celt/arm/arm_celt_map.c
> @@ -30,6 +30,8 @@
> #endif
>
> #include "pitch.h"
> +#include "kiss_fft.h"
> +#include "mdct.h"
>
> #if defined(OPUS_HAVE_RTCD)
>
> @@ -50,7 +52,46 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
> celt_pitch_xcorr_c, /* Media */
> celt_pitch_xcorr_float_neon /* Neon */
> };
> -# endif
> -# endif
>
> -#endif
> +#if defined(HAVE_ARM_NE10)
> +#ifdef CUSTOM_MODES
> +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
Named this as such because it is implementation of opus_fft_alloc_arch().
> + opus_fft_alloc_arch_c, /* ARMv4 */
> + opus_fft_alloc_arch_c, /* EDSP */
> + opus_fft_alloc_arch_c, /* Media */
> + opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
> +};
> +
> +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
> + opus_fft_free_arch_c, /* ARMv4 */
> + opus_fft_free_arch_c, /* EDSP */
> + opus_fft_free_arch_c, /* Media */
> + opus_fft_free_arm_float_neon /* Neon with NE10 */
> +};
> +#endif /* CUSTOM_MODES */
> +
> +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
I wanted to change name of this to OPUS_FFT_IMPL, but thought it would
be misleading
as opus_fft_impl() in celt/kiss_fft.c does not strictly implement opus_fft.
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout) = {
> + opus_fft_c, /* ARMv4 */
> + opus_fft_c, /* EDSP */
> + opus_fft_c, /* Media */
> + opus_fft_float_neon /* Neon with NE10 */
> +};
> +
> +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
I changed this name however, since it did not conflict with any other function.
> + kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window,
> + int overlap, int shift,
> + int stride, int arch) = {
> + clt_mdct_forward_c, /* ARMv4 */
> + clt_mdct_forward_c, /* EDSP */
> + clt_mdct_forward_c, /* Media */
> + clt_mdct_forward_float_neon /* Neon with NE10 */
> +};
> +#endif /* HAVE_ARM_NE10 */
> +# endif /* OPUS_ARM_NEON_INTR */
> +# endif /* FIXED_POINT */
> +
> +#endif /* OPUS_HAVE_RTCD */
> diff --git a/celt/arm/celt_ne10_fft.c b/celt/arm/celt_ne10_fft.c
> new file mode 100644
> index 0000000..b592f19
> --- /dev/null
> +++ b/celt/arm/celt_ne10_fft.c
> @@ -0,0 +1,120 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file celt_ne10_fft.c
> + @brief ARM Neon optimizations for fft using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef SKIP_CONFIG_H
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +#endif
> +
> +#include <arm_neon.h>
> +#include <NE10_init.h>
> +#include <NE10_dsp.h>
> +#include "../kiss_fft.h"
> +#include "stack_alloc.h"
> +#include "os_support.h"
> +#include "stack_alloc.h"
> +
> +#ifdef CUSTOM_MODES
> +
> +/* nfft lengths in NE10 that support scaled fft */
> +#define NE10_FFTSCALED_SUPPORT_MAX 4
> +static const int ne10_fft_scaled_support[NE10_FFTSCALED_SUPPORT_MAX] = {
> + 480, 240, 120, 60
> +};
> +
> +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st)
> +{
> + int i;
> + size_t memneeded = sizeof(struct arch_fft_state);
> +
> + st->arch_fft = (arch_fft_state *)opus_alloc(memneeded);
> + if (!st->arch_fft)
> + return -1;
> +
> + for (i = 0; i < NE10_FFTSCALED_SUPPORT_MAX; i++) {
> + if(st->nfft == ne10_fft_scaled_support[i])
> + break;
> + }
> + if (i == NE10_FFTSCALED_SUPPORT_MAX) {
> + /* This nfft length (scaled fft) is not supported in NE10 */
> + st->arch_fft->is_supported = 0;
> + st->arch_fft->priv = NULL;
> + }
> + else {
> + st->arch_fft->is_supported = 1;
> + st->arch_fft->priv = (void *)ne10_fft_alloc_c2c_float32_neon(st->nfft);
> + if (st->arch_fft->priv == NULL) {
> + return -1;
> + }
> + }
> + return 0;
> +}
> +
> +void opus_fft_free_arm_float_neon(kiss_fft_state *st)
> +{
> + ne10_fft_cfg_float32_t cfg;
> +
> + if (!st->arch_fft)
> + return;
> +
> + cfg = (ne10_fft_cfg_float32_t)st->arch_fft->priv;
> + if (cfg)
> + ne10_fft_destroy_c2c_float32(cfg);
> + opus_free(st->arch_fft);
> +}
> +#endif
> +void opus_fft_float_neon(const kiss_fft_state *st,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout)
> +{
> + ne10_fft_state_float32_t state;
> + ne10_fft_cfg_float32_t cfg = &state;
> + VARDECL(ne10_fft_cpx_float32_t, buffer);
> + SAVE_STACK;
> + ALLOC(buffer, st->nfft, ne10_fft_cpx_float32_t);
> +
> + if (!st->arch_fft->is_supported) {
> + /* This nfft length (scaled fft) not supported in NE10 */
> + opus_fft_c(st, fin, fout);
> + }
> + else {
> + memcpy((void *)cfg, st->arch_fft->priv, sizeof(ne10_fft_state_float32_t));
> + state.buffer = (ne10_fft_cpx_float32_t *)&buffer[0];
> + state.is_forward_scaled = 1;
> +
> + ne10_fft_c2c_1d_float32_neon((ne10_fft_cpx_float32_t *)fout,
> + (ne10_fft_cpx_float32_t *)fin,
> + cfg, 0);
> + }
> + RESTORE_STACK;
> +}
> diff --git a/celt/arm/celt_ne10_mdct.c b/celt/arm/celt_ne10_mdct.c
> new file mode 100644
> index 0000000..cf175cb
> --- /dev/null
> +++ b/celt/arm/celt_ne10_mdct.c
> @@ -0,0 +1,158 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file celt_ne10_mdct.c
> + @brief ARM Neon optimizations for mdct using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef SKIP_CONFIG_H
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +#endif
> +
> +#include "../kiss_fft.h"
> +#include "_kiss_fft_guts.h"
> +#include "../mdct.h"
> +#include "stack_alloc.h"
> +#include "os_support.h"
> +#include "stack_alloc.h"
> +
> +void clt_mdct_forward_float_neon(const mdct_lookup *l,
> + kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window,
> + int overlap, int shift, int stride, int arch)
> +{
> + int i;
> + int N, N2, N4;
> + VARDECL(kiss_fft_scalar, f);
> + VARDECL(kiss_fft_cpx, f2);
> + const kiss_fft_state *st = l->kfft[shift];
> + const kiss_twiddle_scalar *trig;
> +
> + SAVE_STACK;
> +
> + N = l->n;
> + trig = l->trig;
> + for (i=0;i<shift;i++)
> + {
> + N >>= 1;
> + trig += N;
> + }
> + N2 = N>>1;
> + N4 = N>>2;
> +
> + ALLOC(f, N2, kiss_fft_scalar);
> + ALLOC(f2, N4, kiss_fft_cpx);
> +
> + /* Consider the input to be composed of four blocks: [a, b, c, d] */
> + /* Window, shuffle, fold */
> + {
> + /* Temp pointers to make it really clear to the compiler what we're doing */
> + const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
> + const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
> + kiss_fft_scalar * OPUS_RESTRICT yp = f;
> + const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
> + const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
> + for(i=0;i<((overlap+3)>>2);i++)
> + {
> + /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
> + *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
> + *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
> + xp1+=2;
> + xp2-=2;
> + wp1+=2;
> + wp2-=2;
> + }
> + wp1 = window;
> + wp2 = window+overlap-1;
> + for(;i<N4-((overlap+3)>>2);i++)
> + {
> + /* Real part arranged as a-bR, Imag part arranged as -c-dR */
> + *yp++ = *xp2;
> + *yp++ = *xp1;
> + xp1+=2;
> + xp2-=2;
> + }
> + for(;i<N4;i++)
> + {
> + /* Real part arranged as a-bR, Imag part arranged as -c-dR */
> + *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
> + *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
> + xp1+=2;
> + xp2-=2;
> + wp1+=2;
> + wp2-=2;
> + }
> + }
> + /* Pre-rotation */
> + {
> + kiss_fft_scalar * OPUS_RESTRICT yp = f;
> + const kiss_twiddle_scalar *t = &trig[0];
> + for(i=0;i<N4;i++)
> + {
> + kiss_fft_cpx yc;
> + kiss_twiddle_scalar t0, t1;
> + kiss_fft_scalar re, im, yr, yi;
> + t0 = t[i];
> + t1 = t[N4+i];
> + re = *yp++;
> + im = *yp++;
> + yr = S_MUL(re,t0) - S_MUL(im,t1);
> + yi = S_MUL(im,t0) + S_MUL(re,t1);
> + yc.r = yr;
> + yc.i = yi;
> + f2[i] = yc;
> + }
> + }
> +
> + opus_fft(st, f2, (kiss_fft_cpx *)f, arch);
> +
> + /* Post-rotate */
> + {
> + /* Temp pointers to make it really clear to the compiler what we're doing */
> + const kiss_fft_cpx * OPUS_RESTRICT fp = (kiss_fft_cpx *)f;
> + kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
> + kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
> + const kiss_twiddle_scalar *t = &trig[0];
> + /* Temp pointers to make it really clear to the compiler what we're doing */
> + for(i=0;i<N4;i++)
> + {
> + kiss_fft_scalar yr, yi;
> + yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
> + yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
> + *yp1 = yr;
> + *yp2 = yi;
> + fp++;
> + yp1 += 2*stride;
> + yp2 -= 2*stride;
> + }
> + }
> + RESTORE_STACK;
> +}
> diff --git a/celt/arm/fft_arm.h b/celt/arm/fft_arm.h
> new file mode 100644
> index 0000000..e7a30d6
> --- /dev/null
> +++ b/celt/arm/fft_arm.h
> @@ -0,0 +1,66 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file fft_arm.h
> + @brief ARM Neon Intrinsic optimizations for fft using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +
> +#if !defined(FFT_ARM_H)
> +#define FFT_ARM_H
> +
> +#include "config.h"
> +#include "kiss_fft.h"
> +
> +#if !defined(FIXED_POINT)
> +#if defined(HAVE_ARM_NE10)
> +
> +int opus_fft_alloc_arm_float_neon(kiss_fft_state *st);
> +void opus_fft_free_arm_float_neon(kiss_fft_state *st);
> +
> +void opus_fft_float_neon(const kiss_fft_state *st,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout);
> +#if !defined(OPUS_HAVE_RTCD)
> +#define OVERRIDE_OPUS_FFT (1)
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> + ((void)(arch), opus_fft_alloc_arm_float_neon(_st))
> +
> +#define opus_fft_free_arch(_st, arch) \
> + ((void)(arch), opus_fft_free_arm_float_neon(_st))
> +
> +#define opus_fft(_st, _fin, _fout, arch) \
> + ((void)(arch), opus_fft_float_neon(_st, _fin, _fout))
> +
> +#endif /* OPUS_HAVE_RTCD */
> +
> +#endif /* HAVE_ARM_NE10 */
> +#endif /* FIXED_POINT */
> +
> +#endif
> diff --git a/celt/arm/mdct_arm.h b/celt/arm/mdct_arm.h
> new file mode 100644
> index 0000000..33f7bb6
> --- /dev/null
> +++ b/celt/arm/mdct_arm.h
> @@ -0,0 +1,53 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/**
> + @file arm_mdct.h
> + @brief ARM Neon Intrinsic optimizations for mdct using NE10 library
> + */
> +
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#if !defined(MDCT_ARM_H)
> +#define MDCT_ARM_H
> +
> +#include "config.h"
> +#include "mdct.h"
> +
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +/** Compute a forward MDCT and scale by 4/N, trashes the input array */
> +void clt_mdct_forward_float_neon(const mdct_lookup *l, kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window, int overlap,
> + int shift, int stride, int arch);
> +
> +#if !defined(OPUS_HAVE_RTCD)
> +#define OVERRIDE_OPUS_MDCT (1)
> +#define clt_mdct_forward(_l, _in, _out, _window, _int, _shift, _stride, _arch) \
> + clt_mdct_forward_float_neon((_l, _in, _out, _window, _int, _shift, _stride, _arch)
> +#endif /* OPUS_HAVE_RTCD */
> +#endif /* !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) */
> +
> +#endif
> diff --git a/celt/celt_encoder.c b/celt/celt_encoder.c
> index 86a3fbb..7a2c71b 100644
> --- a/celt/celt_encoder.c
> +++ b/celt/celt_encoder.c
> @@ -414,7 +414,8 @@ int patch_transient_decision(opus_val16 *newE, opus_val16 *oldE, int nbEBands,
> /** Apply window and compute the MDCT for all sub-frames and
> all channels in a frame */
> static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS_RESTRICT in,
> - celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample)
> + celt_sig * OPUS_RESTRICT out, int C, int CC, int LM, int upsample,
> + int arch)
> {
> const int overlap = mode->overlap;
> int N;
> @@ -435,7 +436,9 @@ static void compute_mdcts(const CELTMode *mode, int shortBlocks, celt_sig * OPUS
> for (b=0;b<B;b++)
> {
> /* Interleaving the sub-frames while doing the MDCTs */
> - clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N, &out[b+c*N*B], mode->window, overlap, shift, B);
> + clt_mdct_forward(&mode->mdct, in+c*(B*N+overlap)+b*N,
> + &out[b+c*N*B], mode->window, overlap, shift, B,
> + arch);
> }
> } while (++c<CC);
> if (CC==2&&C==1)
> @@ -1603,14 +1606,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
> ALLOC(bandLogE2, C*nbEBands, opus_val16);
> if (secondMdct)
> {
> - compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample);
> + compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch);
> compute_band_energies(mode, freq, bandE, effEnd, C, LM);
> amp2Log2(mode, effEnd, end, bandE, bandLogE2, C);
> for (i=0;i<C*nbEBands;i++)
> bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
> }
>
> - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
> + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
> if (CC==2&&C==1)
> tf_chan = 0;
> compute_band_energies(mode, freq, bandE, effEnd, C, LM);
> @@ -1736,7 +1739,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
> {
> isTransient = 1;
> shortBlocks = M;
> - compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample);
> + compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch);
> compute_band_energies(mode, freq, bandE, effEnd, C, LM);
> amp2Log2(mode, effEnd, end, bandE, bandLogE, C);
> /* Compensate for the scaling of short vs long mdcts */
> diff --git a/celt/dump_modes/Makefile b/celt/dump_modes/Makefile
> index 74d527e..10c3679 100644
> --- a/celt/dump_modes/Makefile
> +++ b/celt/dump_modes/Makefile
> @@ -1,10 +1,31 @@
> +
> CFLAGS=-O2 -Wall -Wextra -DHAVE_CONFIG_H
> INCLUDES=-I. -I../ -I../.. -I../../include
>
> +SOURCES = dump_modes.c \
> + ../modes.c \
> + ../cwrs.c \
> + ../rate.c \
> + ../entenc.c \
> + ../entdec.c \
> + ../mathops.c \
> + ../mdct.c \
> + ../kiss_fft.c
> +
> +ifdef HAVE_ARM_NE10
> +CC = gcc
> +CFLAGS += -mfpu=neon
> +INCLUDES += -I$(NE10_INCDIR) -DHAVE_ARM_NE10 -DOPUS_ARM_NEON_INTR
> +LIBDIR = -l:$(NE10_LIBDIR)/libNE10.so
> +SOURCES += ../arm/celt_ne10_fft.c \
> + dump_modes_arm_ne10.c \
> + ../arm/armcpu.c
> +endif
> +
> all: dump_modes
>
> dump_modes:
> - $(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES dump_modes.c ../modes.c ../cwrs.c ../rate.c ../entenc.c ../entdec.c ../mathops.c ../mdct.c ../kiss_fft.c -o dump_modes -lm
> + $(PREFIX)$(CC) $(CFLAGS) $(INCLUDES) -DCUSTOM_MODES_ONLY -DCUSTOM_MODES $(SOURCES) -o $@ $(LIBDIR) -lm
>
> clean:
> rm -f dump_modes
> diff --git a/celt/dump_modes/dump_modes.c b/celt/dump_modes/dump_modes.c
> index ae6a8c1..9105a53 100644
> --- a/celt/dump_modes/dump_modes.c
> +++ b/celt/dump_modes/dump_modes.c
> @@ -35,6 +35,7 @@
> #include "modes.h"
> #include "celt.h"
> #include "rate.h"
> +#include "dump_modes_arch.h"
>
> #define INT16 "%d"
> #define INT32 "%d"
> @@ -62,6 +63,10 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf(file, "\n It contains static definitions for some pre-defined modes. */\n");
> fprintf(file, "#include \"modes.h\"\n");
> fprintf(file, "#include \"rate.h\"\n");
> + fprintf(file, "\n#ifdef HAVE_ARM_NE10\n");
> + fprintf(file, "#define OVERRIDE_FFT 1\n");
> + fprintf(file, "#include \"%s\"\n", ARM_NE10_ARCH_FILE_NAME);
> + fprintf(file, "#endif\n");
>
> fprintf(file, "\n");
>
> @@ -149,6 +154,9 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
> fprintf (file, "};\n");
>
> +#ifdef OVERRIDE_FFT
> + dump_mode_arch(mode);
> +#endif
> /* FFT Bitrev tables */
> for (k=0;k<=mode->mdct.maxshift;k++)
> {
> @@ -183,6 +191,13 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
> fprintf (file, "}, /* factors */\n");
> fprintf (file, "fft_bitrev%d, /* bitrev */\n", mode->mdct.kfft[k]->nfft);
> fprintf (file, "fft_twiddles%d_%d, /* bitrev */\n", mode->Fs, mdctSize);
> +
> + fprintf (file, "#ifdef OVERRIDE_FFT\n");
> + fprintf (file, "(arch_fft_state *)&cfg_arch_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf (file, "#else\n");
> + fprintf (file, "NULL,\n");
> + fprintf(file, "#endif\n");
> +
> fprintf (file, "};\n");
>
> fprintf(file, "#endif\n");
> @@ -323,8 +338,14 @@ int main(int argc, char **argv)
> }
> }
> file = fopen(BASENAME ".h", "w");
> +#ifdef OVERRIDE_FFT
> + dump_modes_arch_init(m, nb);
> +#endif
> dump_modes(file, m, nb);
> fclose(file);
> +#ifdef OVERRIDE_FFT
> + dump_modes_arch_finalize();
> +#endif
> for (i=0;i<nb;i++)
> opus_custom_mode_destroy(m[i]);
> free(m);
> diff --git a/celt/dump_modes/dump_modes_arch.h b/celt/dump_modes/dump_modes_arch.h
> new file mode 100644
> index 0000000..1436926
> --- /dev/null
> +++ b/celt/dump_modes/dump_modes_arch.h
> @@ -0,0 +1,41 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#ifndef DUMP_MODE_ARCH_H
> +#define DUMP_MODE_ARCH_H
> +
> +void dump_modes_arch_init();
> +void dump_mode_arch(CELTMode *mode);
> +void dump_modes_arch_finalize();
> +
> +#define ARM_NE10_ARCH_FILE_NAME "static_modes_float_arm_ne10.h"
> +
> +#if defined(HAVE_ARM_NE10)
> +#define OVERRIDE_FFT (1)
> +#endif
> +
> +#endif
> diff --git a/celt/dump_modes/dump_modes_arm_ne10.c b/celt/dump_modes/dump_modes_arm_ne10.c
> new file mode 100644
> index 0000000..aa53f17
> --- /dev/null
> +++ b/celt/dump_modes/dump_modes_arm_ne10.c
> @@ -0,0 +1,125 @@
> +/* Copyright (c) 2015 Xiph.Org Foundation
> + Written by Viswanath Puttagunta */
> +/*
> + Redistribution and use in source and binary forms, with or without
> + modification, are permitted provided that the following conditions
> + are met:
> +
> + - Redistributions of source code must retain the above copyright
> + notice, this list of conditions and the following disclaimer.
> +
> + - Redistributions in binary form must reproduce the above copyright
> + notice, this list of conditions and the following disclaimer in the
> + documentation and/or other materials provided with the distribution.
> +
> + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
> + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
> + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
> + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
> + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
> + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
> + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
> + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +*/
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include "modes.h"
> +#include "dump_modes_arch.h"
> +#include <NE10_dsp.h>
> +
> +static FILE *file;
> +
> +void dump_modes_arch_init(CELTMode **modes, int nb_modes)
> +{
> + int i;
> +
> + file = fopen(ARM_NE10_ARCH_FILE_NAME, "w");
> + fprintf(file, "/* The contents of this file was automatically generated by\n");
> + fprintf(file, " * dump_mode_arm_ne10.c with arguments:");
> + for (i=0;i<nb_modes;i++)
> + {
> + CELTMode *mode = modes[i];
> + fprintf(file, " %d %d",mode->Fs,mode->shortMdctSize*mode->nbShortMdcts);
> + }
> + fprintf(file, "\n * It contains static definitions for some pre-defined modes. */\n");
> + fprintf(file, "#include <NE10_init.h>\n\n");
> +}
> +
> +void dump_modes_arch_finalize()
> +{
> + fclose(file);
> +}
> +
> +void dump_mode_arch(CELTMode *mode)
> +{
> + int k, j;
> + int mdctSize;
> +
> + mdctSize = mode->shortMdctSize*mode->nbShortMdcts;
> +
> + fprintf(file, "#ifndef NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
> + fprintf(file, "#define NE10_FFT_PARAMS%d_%d\n", mode->Fs, mdctSize);
> + ne10_fft_cfg_float32_t cfg;
> + /* cfg->factors */
> + for(k=0;k<=mode->mdct.maxshift;k++) {
> + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
> + if (!cfg)
> + continue;
> + fprintf(file, "static const ne10_int32_t ne10_factors_%d[%d] = {\n",
> + mode->mdct.kfft[k]->nfft, (NE10_MAXFACTORS * 2));
> + for(j=0;j<(NE10_MAXFACTORS * 2);j++) {
> + fprintf(file, "%d,%c", cfg->factors[j],(j+16)%15==0?'\n':' ');
> + }
> + fprintf (file, "};\n");
> + }
> +
> + /* cfg->twiddles */
> + for(k=0;k<=mode->mdct.maxshift;k++) {
> + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
> + if (!cfg)
> + continue;
> + fprintf(file, "static const ne10_fft_cpx_float32_t ne10_twiddles_%d[%d] = {\n",
> + mode->mdct.kfft[k]->nfft, mode->mdct.kfft[k]->nfft);
> + for(j=0;j<mode->mdct.kfft[k]->nfft;j++) {
> + fprintf(file, "{%#0.8gf,%#0.8gf},%c", cfg->twiddles[j].r, cfg->twiddles[j].i,(j+4)%3==0?'\n':' ');
> + }
> + fprintf (file, "};\n");
> + }
> +
> + for(k=0;k<=mode->mdct.maxshift;k++) {
> + cfg = (ne10_fft_cfg_float32_t)mode->mdct.kfft[k]->arch_fft->priv;
> + if (!cfg) {
> + fprintf(file, "/* Ne10 does not support scaled FFT for length = %d */\n",
> + mode->mdct.kfft[k]->nfft);
> + fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n", mode->mdct.kfft[k]->nfft);
> + fprintf(file, "0,\n");
> + fprintf(file, "NULL\n");
> + fprintf(file, "};\n");
> + continue;
> + }
> + fprintf(file, "static const ne10_fft_state_float32_t ne10_fft_state_float32_%d = {\n",
> + mode->mdct.kfft[k]->nfft);
> + fprintf(file, "%d,\n", cfg->nfft);
> + fprintf(file, "(ne10_int32_t *)ne10_factors_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf(file, "(ne10_fft_cpx_float32_t *)ne10_twiddles_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf(file, "NULL,\n"); /* buffer */
> + fprintf(file, "(ne10_fft_cpx_float32_t *)&ne10_twiddles_%d[%d],\n",
> + mode->mdct.kfft[k]->nfft, cfg->nfft);
> + fprintf(file, "/* is_forward_scaled = true */\n");
> + fprintf(file, "(ne10_int32_t) 1,\n");
> + fprintf(file, "/* is_backward_scaled = false */\n");
> + fprintf(file, "(ne10_int32_t) 0,\n");
> + fprintf(file, "};\n");
> +
> + fprintf(file, "static const arch_fft_state cfg_arch_%d = {\n",
> + mode->mdct.kfft[k]->nfft);
> + fprintf(file, "1,\n");
> + fprintf(file, "(void *)&ne10_fft_state_float32_%d,\n", mode->mdct.kfft[k]->nfft);
> + fprintf(file, "};\n\n");
> + }
> + fprintf(file, "#endif /* end NE10_FFT_PARAMS%d_%d */\n", mode->Fs, mdctSize);
> +}
> diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
> index cc487fc..38fd4fb 100644
> --- a/celt/kiss_fft.c
> +++ b/celt/kiss_fft.c
> @@ -423,13 +423,19 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
> #endif
> }
>
> +int opus_fft_alloc_arch_c(kiss_fft_state *st) {
> + (void)st;
> + return 0;
> +}
> +
> /*
> *
> * Allocates all necessary storage space for the fft and ifft.
> * The return value is a contiguous block of memory. As such,
> * It can be freed with free().
> * */
> -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base)
> +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
> + const kiss_fft_state *base, int arch)
> {
> kiss_fft_state *st=NULL;
> size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
> @@ -478,22 +484,31 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
> if (st->bitrev==NULL)
> goto fail;
> compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
> +
> + /* Initialize architecture specific fft parameters */
> + if (opus_fft_alloc_arch(st, arch))
> + goto fail;
> }
> return st;
> fail:
> - opus_fft_free(st);
> + opus_fft_free(st, arch);
> return NULL;
> }
>
> -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem )
> +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
> {
> - return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL);
> + return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
> +}
> +
> +void opus_fft_free_arch_c(kiss_fft_state *st) {
> + (void)st;
> }
>
> -void opus_fft_free(const kiss_fft_state *cfg)
> +void opus_fft_free(const kiss_fft_state *cfg, int arch)
> {
> if (cfg)
> {
> + opus_fft_free_arch((kiss_fft_state *)cfg, arch);
> opus_free((opus_int16*)cfg->bitrev);
> if (cfg->shift < 0)
> opus_free((kiss_twiddle_cpx*)cfg->twiddles);
> @@ -551,7 +566,7 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
> }
> }
>
> -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
> +void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
> {
> int i;
> opus_val16 scale;
> diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h
> index 390b54d..11b14bb 100644
> --- a/celt/kiss_fft.h
> +++ b/celt/kiss_fft.h
> @@ -32,6 +32,7 @@
> #include <stdlib.h>
> #include <math.h>
> #include "arch.h"
> +#include "cpu_support.h"
>
> #ifdef __cplusplus
> extern "C" {
> @@ -77,6 +78,11 @@ typedef struct {
> 4*4*4*2
> */
>
> +typedef struct arch_fft_state{
> + int is_supported;
> + void *priv;
> +} arch_fft_state;
> +
> typedef struct kiss_fft_state{
> int nfft;
> opus_val16 scale;
> @@ -87,8 +93,13 @@ typedef struct kiss_fft_state{
> opus_int16 factors[2*MAXFACTORS];
> const opus_int16 *bitrev;
> const kiss_twiddle_cpx *twiddles;
> + arch_fft_state *arch_fft;
> } kiss_fft_state;
>
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +#include "arm/fft_arm.h"
> +#endif
> +
> /*typedef struct kiss_fft_state* kiss_fft_cfg;*/
>
> /**
> @@ -114,9 +125,9 @@ typedef struct kiss_fft_state{
> * buffer size in *lenmem.
> * */
>
> -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base);
> +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch);
>
> -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
> +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
>
> /**
> * opus_fft(cfg,in_out_buf)
> @@ -128,13 +139,48 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem);
> * Note that each element is complex and can be accessed like
> f[k].r and f[k].i
> * */
> -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
> +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
> void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
>
> void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
> void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
>
> -void opus_fft_free(const kiss_fft_state *cfg);
> +void opus_fft_free(const kiss_fft_state *cfg, int arch);
> +
> +
> +void opus_fft_free_arch_c(kiss_fft_state *st);
> +int opus_fft_alloc_arch_c(kiss_fft_state *st);
> +
> +#if !defined(OVERRIDE_OPUS_FFT)
> +/* Is run-time CPU detection enabled on this platform? */
> +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
> +
> +int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st);
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> + ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
> +
> +void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st);
> +#define opus_fft_free_arch(_st, arch) \
> + ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st))
> +
> +void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
> + const kiss_fft_cpx *fin,
> + kiss_fft_cpx *fout);
> +#define opus_fft(_cfg, _fin, _fout, arch) \
> + ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout))
> +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +
> +#define opus_fft_alloc_arch(_st, arch) \
> + opus_fft_alloc_arch_c(_st)
> +
> +#define opus_fft_free_arch(_st, arch) \
> + opus_fft_free_arch_c(_st)
> +
> +#define opus_fft(_cfg, _fin, _fout, arch) \
> + opus_fft_c(_cfg, _fin, _fout)
> +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */
>
> #ifdef __cplusplus
> }
> diff --git a/celt/mdct.c b/celt/mdct.c
> index 2795d90..ee6d80e 100644
> --- a/celt/mdct.c
> +++ b/celt/mdct.c
> @@ -60,7 +60,7 @@
>
> #ifdef CUSTOM_MODES
>
> -int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
> +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
> {
> int i;
> kiss_twiddle_scalar *trig;
> @@ -71,9 +71,9 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
> for (i=0;i<=maxshift;i++)
> {
> if (i==0)
> - l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
> + l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0, arch);
> else
> - l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
> + l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0], arch);
> #ifndef ENABLE_TI_DSPLIB55
> if (l->kfft[i]==NULL)
> return 0;
> @@ -104,11 +104,11 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
> return 1;
> }
>
> -void clt_mdct_clear(mdct_lookup *l)
> +void clt_mdct_clear(mdct_lookup *l, int arch)
> {
> int i;
> for (i=0;i<=l->maxshift;i++)
> - opus_fft_free(l->kfft[i]);
> + opus_fft_free(l->kfft[i], arch);
> opus_free((kiss_twiddle_scalar*)l->trig);
> }
>
> @@ -116,8 +116,8 @@ void clt_mdct_clear(mdct_lookup *l)
>
> /* Forward MDCT trashes the input array */
> #ifndef OVERRIDE_clt_mdct_forward
> -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
> - const opus_val16 *window, int overlap, int shift, int stride)
> +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window, int overlap, int shift, int stride, int arch)
> {
> int i;
> int N, N2, N4;
> @@ -132,6 +132,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
> int scale_shift = st->scale_shift-1;
> #endif
> SAVE_STACK;
> + (void)arch;
> scale = st->scale;
>
> N = l->n;
> diff --git a/celt/mdct.h b/celt/mdct.h
> index d721821..cbaf679 100644
> --- a/celt/mdct.h
> +++ b/celt/mdct.h
> @@ -53,13 +53,19 @@ typedef struct {
> const kiss_twiddle_scalar * OPUS_RESTRICT trig;
> } mdct_lookup;
>
> -int clt_mdct_init(mdct_lookup *l,int N, int maxshift);
> -void clt_mdct_clear(mdct_lookup *l);
> +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10)
> +#include "arm/mdct_arm.h"
> +#endif
> +
> +
> +int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
> +void clt_mdct_clear(mdct_lookup *l, int arch);
>
> /** Compute a forward MDCT and scale by 4/N, trashes the input array */
> -void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in,
> - kiss_fft_scalar * OPUS_RESTRICT out,
> - const opus_val16 *window, int overlap, int shift, int stride);
> +void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window, int overlap,
> + int shift, int stride, int arch);
>
> /** Compute a backward MDCT (no scaling) and performs weighted overlap-add
> (scales implicitly by 1/2) */
> @@ -67,4 +73,27 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in,
> kiss_fft_scalar * OPUS_RESTRICT out,
> const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride);
>
> +#if !defined(OVERRIDE_OPUS_MDCT)
> +/* Is run-time CPU detection enabled on this platform? */
> +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10))
> +
> +void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
> + kiss_fft_scalar *in,
> + kiss_fft_scalar * OPUS_RESTRICT out,
> + const opus_val16 *window,
> + int overlap, int shift,
> + int stride, int arch);
> +
> +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
> + ((*CLT_MDCT_FORWARD_IMPL[(arch)&OPUS_ARCHMASK])(_l, _in, _out, \
> + _window, _overlap, _shift, \
> + _stride, _arch))
> +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +
> +#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
> + clt_mdct_forward_c(_l, _in, _out, _window, _overlap, _shift, _stride, _arch)
> +
> +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */
> +#endif /* end if !defined(OVERRIDE_OPUS_MDCT) */
> +
> #endif
> diff --git a/celt/modes.c b/celt/modes.c
> index 42e68e1..4fe91ff 100644
> --- a/celt/modes.c
> +++ b/celt/modes.c
> @@ -37,6 +37,7 @@
> #include "os_support.h"
> #include "stack_alloc.h"
> #include "quant_bands.h"
> +#include "cpu_support.h"
>
> static const opus_int16 eband5ms[] = {
> /*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k 9.6 12k 15.6 */
> @@ -229,6 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
> opus_val16 *window;
> opus_int16 *logN;
> int LM;
> + int arch = opus_select_arch();
> ALLOC_STACK;
> #if !defined(VAR_ARRAYS) && !defined(USE_ALLOCA)
> if (global_stack==NULL)
> @@ -389,7 +391,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
> compute_pulse_cache(mode, mode->maxLM);
>
> if (clt_mdct_init(&mode->mdct, 2*mode->shortMdctSize*mode->nbShortMdcts,
> - mode->maxLM) == 0)
> + mode->maxLM, arch) == 0)
> goto failure;
>
> if (error)
> @@ -408,6 +410,8 @@ failure:
> #ifdef CUSTOM_MODES
> void opus_custom_mode_destroy(CELTMode *mode)
> {
> + int arch = opus_select_arch();
> +
> if (mode == NULL)
> return;
> #ifndef CUSTOM_MODES_ONLY
> @@ -431,7 +435,7 @@ void opus_custom_mode_destroy(CELTMode *mode)
> opus_free((opus_int16*)mode->cache.index);
> opus_free((unsigned char*)mode->cache.bits);
> opus_free((unsigned char*)mode->cache.caps);
> - clt_mdct_clear(&mode->mdct);
> + clt_mdct_clear(&mode->mdct, arch);
>
> opus_free((CELTMode *)mode);
> }
> diff --git a/celt/static_modes_float.h b/celt/static_modes_float.h
> index 2fadb62..e102a38 100644
> --- a/celt/static_modes_float.h
> +++ b/celt/static_modes_float.h
> @@ -4,6 +4,11 @@
> #include "modes.h"
> #include "rate.h"
>
> +#ifdef HAVE_ARM_NE10
> +#define OVERRIDE_FFT 1
> +#include "static_modes_float_arm_ne10.h"
> +#endif
> +
> #ifndef DEF_WINDOW120
> #define DEF_WINDOW120
> static const opus_val16 window120[120] = {
> @@ -431,6 +436,11 @@ static const kiss_fft_state fft_state48000_960_0 = {
> {5, 96, 3, 32, 4, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev480, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_480,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> @@ -443,6 +453,11 @@ static const kiss_fft_state fft_state48000_960_1 = {
> {5, 48, 3, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev240, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_240,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> @@ -455,6 +470,11 @@ static const kiss_fft_state fft_state48000_960_2 = {
> {5, 24, 3, 8, 2, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev120, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_120,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> @@ -467,6 +487,11 @@ static const kiss_fft_state fft_state48000_960_3 = {
> {5, 12, 3, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
> fft_bitrev60, /* bitrev */
> fft_twiddles48000_960, /* bitrev */
> +#ifdef OVERRIDE_FFT
> +(arch_fft_state *)&cfg_arch_60,
> +#else
> +NULL,
> +#endif
> };
> #endif
>
> diff --git a/celt/static_modes_float_arm_ne10.h b/celt/static_modes_float_arm_ne10.h
> new file mode 100644
> index 0000000..5bcec70
> --- /dev/null
> +++ b/celt/static_modes_float_arm_ne10.h
> @@ -0,0 +1,404 @@
> +/* The contents of this file was automatically generated by
> + * dump_mode_arm_ne10.c with arguments: 48000 960
> + * It contains static definitions for some pre-defined modes. */
> +#include <NE10_init.h>
> +
> +#ifndef NE10_FFT_PARAMS48000_960
> +#define NE10_FFT_PARAMS48000_960
> +static const ne10_int32_t ne10_factors_480[64] = {
> +4, 40, 4, 30, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_240[64] = {
> +3, 20, 4, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_120[64] = {
> +3, 10, 2, 15, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_int32_t ne10_factors_60[64] = {
> +2, 5, 5, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
> +0, 0, 0, 0, };
> +static const ne10_fft_cpx_float32_t ne10_twiddles_480[480] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
> +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
> +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
> +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
> +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
> +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
> +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
> +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
> +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
> +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
> +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
> +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
> +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
> +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
> +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
> +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
> +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
> +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
> +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
> +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
> +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
> +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
> +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
> +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
> +{1.0000000f,-0.0000000f}, {0.99991435f,-0.013089596f}, {0.99965733f,-0.026176950f},
> +{0.99922901f,-0.039259817f}, {0.99862951f,-0.052335959f}, {0.99785894f,-0.065403134f},
> +{0.99691731f,-0.078459099f}, {0.99580491f,-0.091501623f}, {0.99452192f,-0.10452846f},
> +{0.99306846f,-0.11753740f}, {0.99144489f,-0.13052620f}, {0.98965138f,-0.14349262f},
> +{0.98768836f,-0.15643448f}, {0.98555607f,-0.16934951f}, {0.98325491f,-0.18223552f},
> +{0.98078525f,-0.19509032f}, {0.97814763f,-0.20791170f}, {0.97534233f,-0.22069745f},
> +{0.97236991f,-0.23344538f}, {0.96923089f,-0.24615330f}, {0.96592581f,-0.25881904f},
> +{0.96245521f,-0.27144045f}, {0.95881975f,-0.28401536f}, {0.95501995f,-0.29654160f},
> +{0.95105648f,-0.30901700f}, {0.94693011f,-0.32143945f}, {0.94264150f,-0.33380687f},
> +{0.93819129f,-0.34611708f}, {0.93358040f,-0.35836795f}, {0.92880952f,-0.37055743f},
> +{0.92387956f,-0.38268346f}, {0.91879117f,-0.39474389f}, {0.91354543f,-0.40673664f},
> +{0.90814316f,-0.41865975f}, {0.90258527f,-0.43051112f}, {0.89687270f,-0.44228873f},
> +{0.89100653f,-0.45399052f}, {0.88498765f,-0.46561453f}, {0.87881708f,-0.47715878f},
> +{0.87249601f,-0.48862126f}, {0.86602545f,-0.50000000f}, {0.85940641f,-0.51129311f},
> +{0.85264015f,-0.52249855f}, {0.84572786f,-0.53361452f}, {0.83867055f,-0.54463905f},
> +{0.83146960f,-0.55557024f}, {0.82412618f,-0.56640625f}, {0.81664151f,-0.57714522f},
> +{0.80901700f,-0.58778524f}, {0.80125380f,-0.59832460f}, {0.79335332f,-0.60876143f},
> +{0.78531694f,-0.61909395f}, {0.77714598f,-0.62932038f}, {0.76884180f,-0.63943899f},
> +{0.76040596f,-0.64944810f}, {0.75183982f,-0.65934587f}, {0.74314475f,-0.66913062f},
> +{0.73432249f,-0.67880076f}, {0.72537434f,-0.68835455f}, {0.71630192f,-0.69779050f},
> +{0.70710677f,-0.70710683f}, {0.69779044f,-0.71630198f}, {0.68835455f,-0.72537440f},
> +{0.67880070f,-0.73432255f}, {0.66913056f,-0.74314487f}, {0.65934581f,-0.75183982f},
> +{0.64944804f,-0.76040596f}, {0.63943899f,-0.76884186f}, {0.62932038f,-0.77714598f},
> +{0.61909395f,-0.78531694f}, {0.60876137f,-0.79335338f}, {0.59832460f,-0.80125386f},
> +{0.58778524f,-0.80901700f}, {0.57714516f,-0.81664151f}, {0.56640625f,-0.82412618f},
> +{0.55557019f,-0.83146960f}, {0.54463899f,-0.83867055f}, {0.53361452f,-0.84572786f},
> +{0.52249849f,-0.85264015f}, {0.51129311f,-0.85940641f}, {0.49999997f,-0.86602545f},
> +{0.48862118f,-0.87249601f}, {0.47715876f,-0.87881708f}, {0.46561447f,-0.88498765f},
> +{0.45399052f,-0.89100653f}, {0.44228867f,-0.89687276f}, {0.43051103f,-0.90258533f},
> +{0.41865975f,-0.90814316f}, {0.40673661f,-0.91354549f}, {0.39474380f,-0.91879129f},
> +{0.38268343f,-0.92387956f}, {0.37055740f,-0.92880958f}, {0.35836786f,-0.93358046f},
> +{0.34611705f,-0.93819135f}, {0.33380681f,-0.94264150f}, {0.32143947f,-0.94693011f},
> +{0.30901697f,-0.95105654f}, {0.29654151f,-0.95501995f}, {0.28401533f,-0.95881975f},
> +{0.27144039f,-0.96245527f}, {0.25881907f,-0.96592581f}, {0.24615327f,-0.96923089f},
> +{0.23344530f,-0.97236991f}, {0.22069745f,-0.97534233f}, {0.20791166f,-0.97814763f},
> +{0.19509023f,-0.98078531f}, {0.18223552f,-0.98325491f}, {0.16934945f,-0.98555607f},
> +{0.15643437f,-0.98768836f}, {0.14349259f,-0.98965138f}, {0.13052613f,-0.99144489f},
> +{0.11753740f,-0.99306846f}, {0.10452842f,-0.99452192f}, {0.091501534f,-0.99580491f},
> +{0.078459084f,-0.99691731f}, {0.065403074f,-0.99785894f}, {0.052335974f,-0.99862951f},
> +{0.039259788f,-0.99922901f}, {0.026176875f,-0.99965733f}, {0.013089597f,-0.99991435f},
> +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
> +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
> +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
> +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
> +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
> +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
> +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
> +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
> +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
> +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
> +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
> +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
> +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
> +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
> +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
> +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
> +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
> +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
> +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
> +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.026176963f,-0.99965733f}, {-0.052336060f,-0.99862951f},
> +{-0.078459173f,-0.99691731f}, {-0.10452851f,-0.99452192f}, {-0.13052621f,-0.99144489f},
> +{-0.15643445f,-0.98768836f}, {-0.18223560f,-0.98325491f}, {-0.20791174f,-0.97814757f},
> +{-0.23344538f,-0.97236991f}, {-0.25881916f,-0.96592581f}, {-0.28401542f,-0.95881969f},
> +{-0.30901703f,-0.95105648f}, {-0.33380687f,-0.94264150f}, {-0.35836795f,-0.93358040f},
> +{-0.38268352f,-0.92387950f}, {-0.40673670f,-0.91354543f}, {-0.43051112f,-0.90258527f},
> +{-0.45399061f,-0.89100647f}, {-0.47715873f,-0.87881708f}, {-0.50000006f,-0.86602533f},
> +{-0.52249867f,-0.85264009f}, {-0.54463905f,-0.83867055f}, {-0.56640631f,-0.82412612f},
> +{-0.58778518f,-0.80901700f}, {-0.60876143f,-0.79335332f}, {-0.62932050f,-0.77714586f},
> +{-0.64944804f,-0.76040596f}, {-0.66913068f,-0.74314475f}, {-0.68835467f,-0.72537428f},
> +{-0.70710677f,-0.70710677f}, {-0.72537446f,-0.68835449f}, {-0.74314493f,-0.66913044f},
> +{-0.76040596f,-0.64944804f}, {-0.77714604f,-0.62932026f}, {-0.79335332f,-0.60876143f},
> +{-0.80901700f,-0.58778518f}, {-0.82412624f,-0.56640613f}, {-0.83867055f,-0.54463899f},
> +{-0.85264021f,-0.52249849f}, {-0.86602539f,-0.50000006f}, {-0.87881714f,-0.47715873f},
> +{-0.89100659f,-0.45399037f}, {-0.90258527f,-0.43051112f}, {-0.91354549f,-0.40673658f},
> +{-0.92387956f,-0.38268328f}, {-0.93358040f,-0.35836792f}, {-0.94264150f,-0.33380675f},
> +{-0.95105654f,-0.30901679f}, {-0.95881975f,-0.28401530f}, {-0.96592587f,-0.25881892f},
> +{-0.97236991f,-0.23344538f}, {-0.97814763f,-0.20791161f}, {-0.98325491f,-0.18223536f},
> +{-0.98768836f,-0.15643445f}, {-0.99144489f,-0.13052608f}, {-0.99452192f,-0.10452849f},
> +{-0.99691737f,-0.078459039f}, {-0.99862957f,-0.052335810f}, {-0.99965733f,-0.026176952f},
> +{1.0000000f,-0.0000000f}, {0.99922901f,-0.039259817f}, {0.99691731f,-0.078459099f},
> +{0.99306846f,-0.11753740f}, {0.98768836f,-0.15643448f}, {0.98078525f,-0.19509032f},
> +{0.97236991f,-0.23344538f}, {0.96245521f,-0.27144045f}, {0.95105648f,-0.30901700f},
> +{0.93819129f,-0.34611708f}, {0.92387956f,-0.38268346f}, {0.90814316f,-0.41865975f},
> +{0.89100653f,-0.45399052f}, {0.87249601f,-0.48862126f}, {0.85264015f,-0.52249855f},
> +{0.83146960f,-0.55557024f}, {0.80901700f,-0.58778524f}, {0.78531694f,-0.61909395f},
> +{0.76040596f,-0.64944810f}, {0.73432249f,-0.67880076f}, {0.70710677f,-0.70710683f},
> +{0.67880070f,-0.73432255f}, {0.64944804f,-0.76040596f}, {0.61909395f,-0.78531694f},
> +{0.58778524f,-0.80901700f}, {0.55557019f,-0.83146960f}, {0.52249849f,-0.85264015f},
> +{0.48862118f,-0.87249601f}, {0.45399052f,-0.89100653f}, {0.41865975f,-0.90814316f},
> +{0.38268343f,-0.92387956f}, {0.34611705f,-0.93819135f}, {0.30901697f,-0.95105654f},
> +{0.27144039f,-0.96245527f}, {0.23344530f,-0.97236991f}, {0.19509023f,-0.98078531f},
> +{0.15643437f,-0.98768836f}, {0.11753740f,-0.99306846f}, {0.078459084f,-0.99691731f},
> +{0.039259788f,-0.99922901f}, {-4.3711388e-08f,-1.0000000f}, {-0.039259877f,-0.99922901f},
> +{-0.078459173f,-0.99691731f}, {-0.11753749f,-0.99306846f}, {-0.15643445f,-0.98768836f},
> +{-0.19509032f,-0.98078525f}, {-0.23344538f,-0.97236991f}, {-0.27144048f,-0.96245521f},
> +{-0.30901703f,-0.95105648f}, {-0.34611711f,-0.93819129f}, {-0.38268352f,-0.92387950f},
> +{-0.41865984f,-0.90814310f}, {-0.45399061f,-0.89100647f}, {-0.48862135f,-0.87249595f},
> +{-0.52249867f,-0.85264009f}, {-0.55557036f,-0.83146954f}, {-0.58778518f,-0.80901700f},
> +{-0.61909389f,-0.78531694f}, {-0.64944804f,-0.76040596f}, {-0.67880076f,-0.73432249f},
> +{-0.70710677f,-0.70710677f}, {-0.73432249f,-0.67880070f}, {-0.76040596f,-0.64944804f},
> +{-0.78531694f,-0.61909389f}, {-0.80901700f,-0.58778518f}, {-0.83146966f,-0.55557019f},
> +{-0.85264021f,-0.52249849f}, {-0.87249607f,-0.48862115f}, {-0.89100659f,-0.45399037f},
> +{-0.90814322f,-0.41865960f}, {-0.92387956f,-0.38268328f}, {-0.93819135f,-0.34611690f},
> +{-0.95105654f,-0.30901679f}, {-0.96245521f,-0.27144048f}, {-0.97236991f,-0.23344538f},
> +{-0.98078531f,-0.19509031f}, {-0.98768836f,-0.15643445f}, {-0.99306846f,-0.11753736f},
> +{-0.99691737f,-0.078459039f}, {-0.99922901f,-0.039259743f}, {-1.0000000f,8.7422777e-08f},
> +{-0.99922901f,0.039259918f}, {-0.99691731f,0.078459218f}, {-0.99306846f,0.11753753f},
> +{-0.98768830f,0.15643461f}, {-0.98078525f,0.19509049f}, {-0.97236985f,0.23344554f},
> +{-0.96245515f,0.27144065f}, {-0.95105654f,0.30901697f}, {-0.93819135f,0.34611705f},
> +{-0.92387956f,0.38268346f}, {-0.90814316f,0.41865975f}, {-0.89100653f,0.45399055f},
> +{-0.87249601f,0.48862129f}, {-0.85264015f,0.52249861f}, {-0.83146960f,0.55557030f},
> +{-0.80901694f,0.58778536f}, {-0.78531688f,0.61909401f}, {-0.76040590f,0.64944816f},
> +{-0.73432243f,0.67880082f}, {-0.70710665f,0.70710689f}, {-0.67880058f,0.73432261f},
> +{-0.64944792f,0.76040608f}, {-0.61909378f,0.78531706f}, {-0.58778507f,0.80901712f},
> +{-0.55557001f,0.83146977f}, {-0.52249837f,0.85264033f}, {-0.48862100f,0.87249613f},
> +{-0.45399022f,0.89100665f}, {-0.41865945f,0.90814328f}, {-0.38268313f,0.92387968f},
> +{-0.34611672f,0.93819147f}, {-0.30901709f,0.95105648f}, {-0.27144054f,0.96245521f},
> +{-0.23344545f,0.97236991f}, {-0.19509038f,0.98078525f}, {-0.15643452f,0.98768830f},
> +{-0.11753743f,0.99306846f}, {-0.078459114f,0.99691731f}, {-0.039259821f,0.99922901f},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_240[240] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
> +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
> +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
> +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
> +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
> +{1.0000000f,-0.0000000f}, {0.99965733f,-0.026176950f}, {0.99862951f,-0.052335959f},
> +{0.99691731f,-0.078459099f}, {0.99452192f,-0.10452846f}, {0.99144489f,-0.13052620f},
> +{0.98768836f,-0.15643448f}, {0.98325491f,-0.18223552f}, {0.97814763f,-0.20791170f},
> +{0.97236991f,-0.23344538f}, {0.96592581f,-0.25881904f}, {0.95881975f,-0.28401536f},
> +{0.95105648f,-0.30901700f}, {0.94264150f,-0.33380687f}, {0.93358040f,-0.35836795f},
> +{0.92387956f,-0.38268346f}, {0.91354543f,-0.40673664f}, {0.90258527f,-0.43051112f},
> +{0.89100653f,-0.45399052f}, {0.87881708f,-0.47715878f}, {0.86602545f,-0.50000000f},
> +{0.85264015f,-0.52249855f}, {0.83867055f,-0.54463905f}, {0.82412618f,-0.56640625f},
> +{0.80901700f,-0.58778524f}, {0.79335332f,-0.60876143f}, {0.77714598f,-0.62932038f},
> +{0.76040596f,-0.64944810f}, {0.74314475f,-0.66913062f}, {0.72537434f,-0.68835455f},
> +{0.70710677f,-0.70710683f}, {0.68835455f,-0.72537440f}, {0.66913056f,-0.74314487f},
> +{0.64944804f,-0.76040596f}, {0.62932038f,-0.77714598f}, {0.60876137f,-0.79335338f},
> +{0.58778524f,-0.80901700f}, {0.56640625f,-0.82412618f}, {0.54463899f,-0.83867055f},
> +{0.52249849f,-0.85264015f}, {0.49999997f,-0.86602545f}, {0.47715876f,-0.87881708f},
> +{0.45399052f,-0.89100653f}, {0.43051103f,-0.90258533f}, {0.40673661f,-0.91354549f},
> +{0.38268343f,-0.92387956f}, {0.35836786f,-0.93358046f}, {0.33380681f,-0.94264150f},
> +{0.30901697f,-0.95105654f}, {0.28401533f,-0.95881975f}, {0.25881907f,-0.96592581f},
> +{0.23344530f,-0.97236991f}, {0.20791166f,-0.97814763f}, {0.18223552f,-0.98325491f},
> +{0.15643437f,-0.98768836f}, {0.13052613f,-0.99144489f}, {0.10452842f,-0.99452192f},
> +{0.078459084f,-0.99691731f}, {0.052335974f,-0.99862951f}, {0.026176875f,-0.99965733f},
> +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
> +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
> +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
> +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
> +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
> +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
> +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
> +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
> +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
> +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.052336060f,-0.99862951f}, {-0.10452851f,-0.99452192f},
> +{-0.15643445f,-0.98768836f}, {-0.20791174f,-0.97814757f}, {-0.25881916f,-0.96592581f},
> +{-0.30901703f,-0.95105648f}, {-0.35836795f,-0.93358040f}, {-0.40673670f,-0.91354543f},
> +{-0.45399061f,-0.89100647f}, {-0.50000006f,-0.86602533f}, {-0.54463905f,-0.83867055f},
> +{-0.58778518f,-0.80901700f}, {-0.62932050f,-0.77714586f}, {-0.66913068f,-0.74314475f},
> +{-0.70710677f,-0.70710677f}, {-0.74314493f,-0.66913044f}, {-0.77714604f,-0.62932026f},
> +{-0.80901700f,-0.58778518f}, {-0.83867055f,-0.54463899f}, {-0.86602539f,-0.50000006f},
> +{-0.89100659f,-0.45399037f}, {-0.91354549f,-0.40673658f}, {-0.93358040f,-0.35836792f},
> +{-0.95105654f,-0.30901679f}, {-0.96592587f,-0.25881892f}, {-0.97814763f,-0.20791161f},
> +{-0.98768836f,-0.15643445f}, {-0.99452192f,-0.10452849f}, {-0.99862957f,-0.052335810f},
> +{1.0000000f,-0.0000000f}, {0.99691731f,-0.078459099f}, {0.98768836f,-0.15643448f},
> +{0.97236991f,-0.23344538f}, {0.95105648f,-0.30901700f}, {0.92387956f,-0.38268346f},
> +{0.89100653f,-0.45399052f}, {0.85264015f,-0.52249855f}, {0.80901700f,-0.58778524f},
> +{0.76040596f,-0.64944810f}, {0.70710677f,-0.70710683f}, {0.64944804f,-0.76040596f},
> +{0.58778524f,-0.80901700f}, {0.52249849f,-0.85264015f}, {0.45399052f,-0.89100653f},
> +{0.38268343f,-0.92387956f}, {0.30901697f,-0.95105654f}, {0.23344530f,-0.97236991f},
> +{0.15643437f,-0.98768836f}, {0.078459084f,-0.99691731f}, {-4.3711388e-08f,-1.0000000f},
> +{-0.078459173f,-0.99691731f}, {-0.15643445f,-0.98768836f}, {-0.23344538f,-0.97236991f},
> +{-0.30901703f,-0.95105648f}, {-0.38268352f,-0.92387950f}, {-0.45399061f,-0.89100647f},
> +{-0.52249867f,-0.85264009f}, {-0.58778518f,-0.80901700f}, {-0.64944804f,-0.76040596f},
> +{-0.70710677f,-0.70710677f}, {-0.76040596f,-0.64944804f}, {-0.80901700f,-0.58778518f},
> +{-0.85264021f,-0.52249849f}, {-0.89100659f,-0.45399037f}, {-0.92387956f,-0.38268328f},
> +{-0.95105654f,-0.30901679f}, {-0.97236991f,-0.23344538f}, {-0.98768836f,-0.15643445f},
> +{-0.99691737f,-0.078459039f}, {-1.0000000f,8.7422777e-08f}, {-0.99691731f,0.078459218f},
> +{-0.98768830f,0.15643461f}, {-0.97236985f,0.23344554f}, {-0.95105654f,0.30901697f},
> +{-0.92387956f,0.38268346f}, {-0.89100653f,0.45399055f}, {-0.85264015f,0.52249861f},
> +{-0.80901694f,0.58778536f}, {-0.76040590f,0.64944816f}, {-0.70710665f,0.70710689f},
> +{-0.64944792f,0.76040608f}, {-0.58778507f,0.80901712f}, {-0.52249837f,0.85264033f},
> +{-0.45399022f,0.89100665f}, {-0.38268313f,0.92387968f}, {-0.30901709f,0.95105648f},
> +{-0.23344545f,0.97236991f}, {-0.15643452f,0.98768830f}, {-0.078459114f,0.99691731f},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_120[120] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.99862951f,-0.052335959f}, {0.99452192f,-0.10452846f},
> +{0.98768836f,-0.15643448f}, {0.97814763f,-0.20791170f}, {0.96592581f,-0.25881904f},
> +{0.95105648f,-0.30901700f}, {0.93358040f,-0.35836795f}, {0.91354543f,-0.40673664f},
> +{0.89100653f,-0.45399052f}, {0.86602545f,-0.50000000f}, {0.83867055f,-0.54463905f},
> +{0.80901700f,-0.58778524f}, {0.77714598f,-0.62932038f}, {0.74314475f,-0.66913062f},
> +{0.70710677f,-0.70710683f}, {0.66913056f,-0.74314487f}, {0.62932038f,-0.77714598f},
> +{0.58778524f,-0.80901700f}, {0.54463899f,-0.83867055f}, {0.49999997f,-0.86602545f},
> +{0.45399052f,-0.89100653f}, {0.40673661f,-0.91354549f}, {0.35836786f,-0.93358046f},
> +{0.30901697f,-0.95105654f}, {0.25881907f,-0.96592581f}, {0.20791166f,-0.97814763f},
> +{0.15643437f,-0.98768836f}, {0.10452842f,-0.99452192f}, {0.052335974f,-0.99862951f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{-4.3711388e-08f,-1.0000000f}, {-0.10452851f,-0.99452192f}, {-0.20791174f,-0.97814757f},
> +{-0.30901703f,-0.95105648f}, {-0.40673670f,-0.91354543f}, {-0.50000006f,-0.86602533f},
> +{-0.58778518f,-0.80901700f}, {-0.66913068f,-0.74314475f}, {-0.74314493f,-0.66913044f},
> +{-0.80901700f,-0.58778518f}, {-0.86602539f,-0.50000006f}, {-0.91354549f,-0.40673658f},
> +{-0.95105654f,-0.30901679f}, {-0.97814763f,-0.20791161f}, {-0.99452192f,-0.10452849f},
> +{1.0000000f,-0.0000000f}, {0.98768836f,-0.15643448f}, {0.95105648f,-0.30901700f},
> +{0.89100653f,-0.45399052f}, {0.80901700f,-0.58778524f}, {0.70710677f,-0.70710683f},
> +{0.58778524f,-0.80901700f}, {0.45399052f,-0.89100653f}, {0.30901697f,-0.95105654f},
> +{0.15643437f,-0.98768836f}, {-4.3711388e-08f,-1.0000000f}, {-0.15643445f,-0.98768836f},
> +{-0.30901703f,-0.95105648f}, {-0.45399061f,-0.89100647f}, {-0.58778518f,-0.80901700f},
> +{-0.70710677f,-0.70710677f}, {-0.80901700f,-0.58778518f}, {-0.89100659f,-0.45399037f},
> +{-0.95105654f,-0.30901679f}, {-0.98768836f,-0.15643445f}, {-1.0000000f,8.7422777e-08f},
> +{-0.98768830f,0.15643461f}, {-0.95105654f,0.30901697f}, {-0.89100653f,0.45399055f},
> +{-0.80901694f,0.58778536f}, {-0.70710665f,0.70710689f}, {-0.58778507f,0.80901712f},
> +{-0.45399022f,0.89100665f}, {-0.30901709f,0.95105648f}, {-0.15643452f,0.98768830f},
> +};
> +static const ne10_fft_cpx_float32_t ne10_twiddles_60[60] = {
> +{1.0000000f,0.0000000f}, {1.0000000f,-0.0000000f}, {1.0000000f,-0.0000000f},
> +{1.0000000f,-0.0000000f}, {0.91354543f,-0.40673664f}, {0.66913056f,-0.74314487f},
> +{1.0000000f,-0.0000000f}, {0.66913056f,-0.74314487f}, {-0.10452851f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.30901697f,-0.95105654f}, {-0.80901700f,-0.58778518f},
> +{1.0000000f,-0.0000000f}, {-0.10452851f,-0.99452192f}, {-0.97814757f,0.20791179f},
> +{1.0000000f,-0.0000000f}, {0.99452192f,-0.10452846f}, {0.97814763f,-0.20791170f},
> +{0.95105648f,-0.30901700f}, {0.91354543f,-0.40673664f}, {0.86602545f,-0.50000000f},
> +{0.80901700f,-0.58778524f}, {0.74314475f,-0.66913062f}, {0.66913056f,-0.74314487f},
> +{0.58778524f,-0.80901700f}, {0.49999997f,-0.86602545f}, {0.40673661f,-0.91354549f},
> +{0.30901697f,-0.95105654f}, {0.20791166f,-0.97814763f}, {0.10452842f,-0.99452192f},
> +{1.0000000f,-0.0000000f}, {0.97814763f,-0.20791170f}, {0.91354543f,-0.40673664f},
> +{0.80901700f,-0.58778524f}, {0.66913056f,-0.74314487f}, {0.49999997f,-0.86602545f},
> +{0.30901697f,-0.95105654f}, {0.10452842f,-0.99452192f}, {-0.10452851f,-0.99452192f},
> +{-0.30901703f,-0.95105648f}, {-0.50000006f,-0.86602533f}, {-0.66913068f,-0.74314475f},
> +{-0.80901700f,-0.58778518f}, {-0.91354549f,-0.40673658f}, {-0.97814763f,-0.20791161f},
> +{1.0000000f,-0.0000000f}, {0.95105648f,-0.30901700f}, {0.80901700f,-0.58778524f},
> +{0.58778524f,-0.80901700f}, {0.30901697f,-0.95105654f}, {-4.3711388e-08f,-1.0000000f},
> +{-0.30901703f,-0.95105648f}, {-0.58778518f,-0.80901700f}, {-0.80901700f,-0.58778518f},
> +{-0.95105654f,-0.30901679f}, {-1.0000000f,8.7422777e-08f}, {-0.95105654f,0.30901697f},
> +{-0.80901694f,0.58778536f}, {-0.58778507f,0.80901712f}, {-0.30901709f,0.95105648f},
> +};
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_480 = {
> +120,
> +(ne10_int32_t *)ne10_factors_480,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_480,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_480[120],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_480 = {
> +1,
> +(void *)&ne10_fft_state_float32_480,
> +};
> +
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_240 = {
> +60,
> +(ne10_int32_t *)ne10_factors_240,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_240,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_240[60],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_240 = {
> +1,
> +(void *)&ne10_fft_state_float32_240,
> +};
> +
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_120 = {
> +30,
> +(ne10_int32_t *)ne10_factors_120,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_120,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_120[30],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_120 = {
> +1,
> +(void *)&ne10_fft_state_float32_120,
> +};
> +
> +static const ne10_fft_state_float32_t ne10_fft_state_float32_60 = {
> +15,
> +(ne10_int32_t *)ne10_factors_60,
> +(ne10_fft_cpx_float32_t *)ne10_twiddles_60,
> +NULL,
> +(ne10_fft_cpx_float32_t *)&ne10_twiddles_60[15],
> +/* is_forward_scaled = true */
> +(ne10_int32_t) 1,
> +/* is_backward_scaled = false */
> +(ne10_int32_t) 0,
> +};
> +static const arch_fft_state cfg_arch_60 = {
> +1,
> +(void *)&ne10_fft_state_float32_60,
> +};
> +
> +#endif /* end NE10_FFT_PARAMS48000_960 */
> diff --git a/celt/tests/test_unit_dft.c b/celt/tests/test_unit_dft.c
> index 57db0e3..4a2f8af 100644
> --- a/celt/tests/test_unit_dft.c
> +++ b/celt/tests/test_unit_dft.c
> @@ -40,11 +40,27 @@
> #define CELT_C
> #define TEST_UNIT_DFT_C
> #include "stack_alloc.h"
> +#include "pitch.h"
> +#include "celt_lpc.c"
> +#include "pitch.c"
> #include "kiss_fft.h"
> #include "kiss_fft.c"
> +#include "mdct.c"
> #include "mathops.c"
> #include "entcode.c"
>
> +#if defined(OPUS_HAVE_RTCD) && \
> + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_NEON_INTR))
> +#include "arm/armcpu.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#include "arm/celt_neon_intr.c"
> +#include "arm/arm_celt_map.c"
> +#endif
> +#elif defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
> +#include "x86/x86cpu.c"
> +#endif
>
> #ifndef M_PI
> #define M_PI 3.141592653
> @@ -93,13 +109,13 @@ void check(kiss_fft_cpx * in,kiss_fft_cpx * out,int nfft,int isinverse)
> }
> }
>
> -void test1d(int nfft,int isinverse)
> +void test1d(int nfft,int isinverse,int arch)
> {
> size_t buflen = sizeof(kiss_fft_cpx)*nfft;
>
> kiss_fft_cpx * in = (kiss_fft_cpx*)malloc(buflen);
> kiss_fft_cpx * out= (kiss_fft_cpx*)malloc(buflen);
> - kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0);
> + kiss_fft_state *cfg = opus_fft_alloc(nfft,0,0,arch);
> int k;
>
> for (k=0;k<nfft;++k) {
> @@ -125,7 +141,7 @@ void test1d(int nfft,int isinverse)
> if (isinverse)
> opus_ifft(cfg,in,out);
> else
> - opus_fft(cfg,in,out);
> + opus_fft(cfg,in,out, arch);
>
> /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
>
> @@ -139,26 +155,28 @@ void test1d(int nfft,int isinverse)
> int main(int argc,char ** argv)
> {
> ALLOC_STACK;
> + int arch = opus_select_arch();
> +
> if (argc>1) {
> int k;
> for (k=1;k<argc;++k) {
> - test1d(atoi(argv[k]),0);
> - test1d(atoi(argv[k]),1);
> + test1d(atoi(argv[k]),0,arch);
> + test1d(atoi(argv[k]),1,arch);
> }
> }else{
> - test1d(32,0);
> - test1d(32,1);
> - test1d(128,0);
> - test1d(128,1);
> - test1d(256,0);
> - test1d(256,1);
> + test1d(32,0,arch);
> + test1d(32,1,arch);
> + test1d(128,0,arch);
> + test1d(128,1,arch);
> + test1d(256,0,arch);
> + test1d(256,1,arch);
> #ifndef RADIX_TWO_ONLY
> - test1d(36,0);
> - test1d(36,1);
> - test1d(50,0);
> - test1d(50,1);
> - test1d(120,0);
> - test1d(120,1);
> + test1d(36,0,arch);
> + test1d(36,1,arch);
> + test1d(50,0,arch);
> + test1d(50,1,arch);
> + test1d(120,0,arch);
> + test1d(120,1,arch);
> #endif
> }
> return ret;
> diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c
> index b9b1bcf..0f1e4f1 100644
> --- a/celt/tests/test_unit_mathops.c
> +++ b/celt/tests/test_unit_mathops.c
> @@ -49,6 +49,8 @@
> #include "cwrs.c"
> #include "pitch.c"
> #include "celt_lpc.c"
> +#include "kiss_fft.c"
> +#include "mdct.c"
>
> #if defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)
> #include "x86/pitch_sse.c"
> @@ -60,6 +62,10 @@
> || defined(OPUS_ARM_NEON_INTR))
> #if defined(OPUS_ARM_NEON_INTR)
> #include "arm/celt_neon_intr.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#endif
> #endif
> #include "arm/arm_celt_map.c"
> #endif
> diff --git a/celt/tests/test_unit_mdct.c b/celt/tests/test_unit_mdct.c
> index ac8957f..4c27b4c 100644
> --- a/celt/tests/test_unit_mdct.c
> +++ b/celt/tests/test_unit_mdct.c
> @@ -43,9 +43,24 @@
>
> #include "kiss_fft.c"
> #include "mdct.c"
> +#include "pitch.c"
> +#include "celt_lpc.c"
> #include "mathops.c"
> #include "entcode.c"
>
> +#if defined(OPUS_HAVE_RTCD) && \
> + (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_NEON_INTR))
> +#include "arm/armcpu.c"
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#include "arm/celt_neon_intr.c"
> +#include "arm/arm_celt_map.c"
> +#endif
> +#elif defined(OPUS_X86_MAY_HAVE_SSE2) || defined(OPUS_X86_MAY_HAVE_SSE4_1)
> +#include "x86/x86cpu.c"
> +#endif
> +
> #ifndef M_PI
> #define M_PI 3.141592653
> #endif
> @@ -112,7 +127,7 @@ void check_inv(kiss_fft_scalar * in,kiss_fft_scalar * out,int nfft,int isinver
> }
>
>
> -void test1d(int nfft,int isinverse)
> +void test1d(int nfft,int isinverse,int arch)
> {
> mdct_lookup cfg;
> size_t buflen = sizeof(kiss_fft_scalar)*nfft;
> @@ -123,7 +138,7 @@ void test1d(int nfft,int isinverse)
> opus_val16 * window= (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
> int k;
>
> - clt_mdct_init(&cfg, nfft, 0);
> + clt_mdct_init(&cfg, nfft, 0, arch);
> for (k=0;k<nfft;++k) {
> in[k] = (rand() % 32768) - 16384;
> }
> @@ -156,7 +171,7 @@ void test1d(int nfft,int isinverse)
> out[nfft-k-1] = out[nfft/2+k];
> check_inv(in,out,nfft,isinverse);
> } else {
> - clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1);
> + clt_mdct_forward(&cfg,in,out,window, nfft/2, 0, 1, arch);
> check(in_copy,out,nfft,isinverse);
> }
> /*for (k=0;k<nfft;++k) printf("%d %d ", out[k].r, out[k].i);printf("\n");*/
> @@ -164,46 +179,48 @@ void test1d(int nfft,int isinverse)
>
> free(in);
> free(out);
> - clt_mdct_clear(&cfg);
> + clt_mdct_clear(&cfg, arch);
> }
>
> int main(int argc,char ** argv)
> {
> ALLOC_STACK;
> + int arch = opus_select_arch();
> +
> if (argc>1) {
> int k;
> for (k=1;k<argc;++k) {
> - test1d(atoi(argv[k]),0);
> - test1d(atoi(argv[k]),1);
> + test1d(atoi(argv[k]),0,arch);
> + test1d(atoi(argv[k]),1,arch);
> }
> }else{
> - test1d(32,0);
> - test1d(32,1);
> - test1d(256,0);
> - test1d(256,1);
> - test1d(512,0);
> - test1d(512,1);
> - test1d(1024,0);
> - test1d(1024,1);
> - test1d(2048,0);
> - test1d(2048,1);
> + test1d(32,0,arch);
> + test1d(32,1,arch);
> + test1d(256,0,arch);
> + test1d(256,1,arch);
> + test1d(512,0,arch);
> + test1d(512,1,arch);
> + test1d(1024,0,arch);
> + test1d(1024,1,arch);
> + test1d(2048,0,arch);
> + test1d(2048,1,arch);
> #ifndef RADIX_TWO_ONLY
> - test1d(36,0);
> - test1d(36,1);
> - test1d(40,0);
> - test1d(40,1);
> - test1d(60,0);
> - test1d(60,1);
> - test1d(120,0);
> - test1d(120,1);
> - test1d(240,0);
> - test1d(240,1);
> - test1d(480,0);
> - test1d(480,1);
> - test1d(960,0);
> - test1d(960,1);
> - test1d(1920,0);
> - test1d(1920,1);
> + test1d(36,0,arch);
> + test1d(36,1,arch);
> + test1d(40,0,arch);
> + test1d(40,1,arch);
> + test1d(60,0,arch);
> + test1d(60,1,arch);
> + test1d(120,0,arch);
> + test1d(120,1,arch);
> + test1d(240,0,arch);
> + test1d(240,1,arch);
> + test1d(480,0,arch);
> + test1d(480,1,arch);
> + test1d(960,0,arch);
> + test1d(960,1,arch);
> + test1d(1920,0,arch);
> + test1d(1920,1,arch);
> #endif
> }
> return ret;
> diff --git a/celt/tests/test_unit_rotation.c b/celt/tests/test_unit_rotation.c
> index 5507884..ce14936 100644
> --- a/celt/tests/test_unit_rotation.c
> +++ b/celt/tests/test_unit_rotation.c
> @@ -46,6 +46,8 @@
> #include "bands.h"
> #include "pitch.c"
> #include "celt_lpc.c"
> +#include "kiss_fft.c"
> +#include "mdct.c"
> #include <math.h>
>
> #if defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)
> @@ -59,6 +61,10 @@
> #if defined(OPUS_ARM_NEON_INTR)
> #include "arm/celt_neon_intr.c"
> #endif
> +#if defined(HAVE_ARM_NE10)
> +#include "arm/celt_ne10_fft.c"
> +#include "arm/celt_ne10_mdct.c"
> +#endif
> #include "arm/arm_celt_map.c"
> #endif
>
> diff --git a/celt_headers.mk b/celt_headers.mk
> index 5bb193e..c51c3ee 100644
> --- a/celt_headers.mk
> +++ b/celt_headers.mk
> @@ -31,11 +31,14 @@ celt/stack_alloc.h \
> celt/vq.h \
> celt/static_modes_float.h \
> celt/static_modes_fixed.h \
> +celt/static_modes_float_arm_ne10.h \
> celt/arm/armcpu.h \
> celt/arm/fixed_armv4.h \
> celt/arm/fixed_armv5e.h \
> celt/arm/kiss_fft_armv4.h \
> celt/arm/kiss_fft_armv5e.h \
> celt/arm/pitch_arm.h \
> +celt/arm/fft_arm.h \
> +celt/arm/mdct_arm.h \
> celt/x86/pitch_sse.h \
> celt/x86/x86cpu.h
> diff --git a/celt_sources.mk b/celt_sources.mk
> index 29ec937..7121301 100644
> --- a/celt_sources.mk
> +++ b/celt_sources.mk
> @@ -35,3 +35,7 @@ celt/arm/armopts.s.in
>
> CELT_SOURCES_ARM_NEON_INTR = \
> celt/arm/celt_neon_intr.c
> +
> +CELT_SOURCES_ARM_NE10= \
> +celt/arm/celt_ne10_fft.c \
> +celt/arm/celt_ne10_mdct.c
> diff --git a/configure.ac b/configure.ac
> index 87cece9..baa3425 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -351,6 +351,80 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
> AM_CONDITIONAL([HAVE_SSE4_1], [false])
> AM_CONDITIONAL([HAVE_SSE2], [false])
>
> +AC_DEFUN([OPUS_PATH_NE10],
> + [
> + AC_ARG_WITH(NE10,
> + AC_HELP_STRING([--with-NE10=PFX],[Prefix where libNE10 is installed (optional)]),
> + NE10_prefix="$withval", NE10_prefix="")
> + AC_ARG_WITH(NE10-libraries,
> + AC_HELP_STRING([--with-NE10-libraries=DIR],
> + [Directory where libNE10 library is installed (optional)]),
> + NE10_libraries="$withval", NE10_libraries="")
> + AC_ARG_WITH(NE10-includes,
> + AC_HELP_STRING([--with-NE10-includes=DIR],
> + [Directory where libNE10 header files are installed (optional)]),
> + NE10_includes="$withval", ogg_includes="")
> +
> + if test "x$NE10_libraries" != "x" ; then
> + NE10_LIBS="-L$NE10_libraries"
> + elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
> + NE10_LIBS=""
> + elif test "x$NE10_prefix" != "x" ; then
> + NE10_LIBS="-L$NE10_prefix/lib"
> + elif test "x$prefix" != "xNONE" ; then
> + NE10_LIBS="-L$prefix/lib"
> + fi
> +
> + if test "x$NE10_prefix" != "xno" ; then
> + NE10_LIBS="$NE10_LIBS -lNE10"
> + fi
> +
> + if test "x$NE10_includes" != "x" ; then
> + NE10_CFLAGS="-I$NE10_includes"
> + elif test "x$NE10_prefix" = "xno" || test "x$NE10_prefix" = "xyes" ; then
> + NE10_CFLAGS=""
> + elif test "x$ogg_prefix" != "x" ; then
> + NE10_CFLAGS="-I$NE10_prefix/include"
> + elif test "x$prefix" != "xNONE"; then
> + NE10_CFLAGS="-I$prefix/include"
> + fi
> +
> + AC_MSG_CHECKING(for NE10)
> + save_CFLAGS="$CFLAGS"; CFLAGS="$NE10_CFLAGS"
> + save_LIBS="$LIBS"; LIBS="$NE10_LIBS"
> + AC_LINK_IFELSE(
> + [
> + AC_LANG_PROGRAM(
> + [[#include <NE10_init.h>
> + ]],
> + [[
> + ne10_fft_cfg_float32_t cfg;
> + cfg = ne10_fft_alloc_c2c_float32_neon(480);
> + ]]
> + )
> + ],[
> + HAVE_ARM_NE10=1
> + AC_MSG_RESULT([yes])
> + ],[
> + HAVE_ARM_NE10=0
> + AC_MSG_RESULT([no])
> + NE10_CFLAGS=""
> + NE10_LIBS=""
> + ]
> + )
> + CFLAGS="$save_CFLAGS"; LIBS="$save_LIBS"
> + #Now we know if libNE10 is installed or not
> + AS_IF([test x"$HAVE_ARM_NE10" = x"1"],
> + [
> + AC_DEFINE([HAVE_ARM_NE10], 1, [NE10 library is installed on host. Make sure it is on target!])
> + AC_SUBST(HAVE_ARM_NE10)
> + AC_SUBST(NE10_CFLAGS)
> + AC_SUBST(NE10_LIBS)
> + ],[]
> + )
> + ]
> +)
> +
> AS_IF([test x"$enable_intrinsics" = x"yes"],[
> case $host_cpu in
> arm*)
> @@ -391,6 +465,10 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
> AC_DEFINE([OPUS_ARM_MAY_HAVE_EDSP], 1, [Define if compiler support EDSP Instructions])
> AC_DEFINE([OPUS_ARM_MAY_HAVE_MEDIA], 1, [Define if compiler support MEDIA Instructions])
> AC_DEFINE([OPUS_ARM_MAY_HAVE_NEON], 1, [Define if compiler support NEON instructions])
> +
> + OPUS_PATH_NE10()
> + AS_IF([test x"$NE10_LIBS" != "x"],
> + [enable_intrinsics="$enable_intrinsics NE10"],[])
> ],
> [
> AC_MSG_WARN([Compiler does not support ARM intrinsics])
> @@ -516,6 +594,9 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
> AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
> AM_CONDITIONAL([OPUS_ARM_NEON_INTR],
> [test x"$OPUS_ARM_NEON_INTR" = x"1"])
> +AM_CONDITIONAL([HAVE_ARM_NE10],
> + [test x"$HAVE_ARM_NE10" = x"1"])
> +
>
> AS_IF([test x"$enable_rtcd" = x"yes"],[
> AS_IF([test x"$rtcd_support" != x"no"],[
> diff --git a/src/analysis.c b/src/analysis.c
> index 2ee8533..e04b282 100644
> --- a/src/analysis.c
> +++ b/src/analysis.c
> @@ -189,7 +189,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
> info_out->music_prob = psum;
> }
>
> -static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
> +static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix, int arch)
> {
> int i, b;
> const kiss_fft_state *kfft;
> @@ -262,7 +262,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
> remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
> downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
> tonal->mem_fill = 240 + remaining;
> - opus_fft(kfft, in, out);
> + opus_fft(kfft, in, out, arch);
> #ifndef FIXED_POINT
> /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
> if (celt_isnan(out[0].r))
> @@ -635,7 +635,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
>
> void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
> int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
> - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
> + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch)
> {
> int offset;
> int pcm_len;
> @@ -648,7 +648,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
> pcm_len = analysis_frame_size - analysis->analysis_offset;
> offset = analysis->analysis_offset;
> do {
> - tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
> + tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix, arch);
> offset += 480;
> pcm_len -= 480;
> } while (pcm_len>0);
> diff --git a/src/analysis.h b/src/analysis.h
> index 85a73d7..9c328e8 100644
> --- a/src/analysis.h
> +++ b/src/analysis.h
> @@ -82,6 +82,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
>
> void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
> int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
> - int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
> + int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch);
>
> #endif
> diff --git a/src/opus_encoder.c b/src/opus_encoder.c
> index d94163f..4656da5 100644
> --- a/src/opus_encoder.c
> +++ b/src/opus_encoder.c
> @@ -1006,7 +1006,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
> analysis_read_subframe_bak = st->analysis.read_subframe;
> run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
> c1, c2, analysis_channels, st->Fs,
> - lsb_depth, downmix, &analysis_info);
> + lsb_depth, downmix, &analysis_info, st->arch);
> }
> #else
> (void)analysis_pcm;
> diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
> index 6e87337..1281e85 100644
> --- a/src/opus_multistream_encoder.c
> +++ b/src/opus_multistream_encoder.c
> @@ -71,6 +71,7 @@ typedef void (*opus_copy_channel_in_func)(
>
> struct OpusMSEncoder {
> ChannelLayout layout;
> + int arch;
> int lfe_stream;
> int application;
> int variable_duration;
> @@ -218,7 +219,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b)
> #endif
>
> void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
> - int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
> + int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch
> )
> {
> int c;
> @@ -257,7 +258,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
> OPUS_COPY(in, mem+c*overlap, overlap);
> (*copy_channel_in)(x, 1, pcm, channels, c, len);
> celt_preemphasis(x, in+overlap, frame_size, 1, upsample, celt_mode->preemph, preemph_mem+c, 0);
> - clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
> + clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
> + overlap, celt_mode->maxLM-LM, 1, arch);
> if (upsample != 1)
> {
> int bound = len;
> @@ -411,6 +413,7 @@ static int opus_multistream_encoder_init_impl(
> (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
> return OPUS_BAD_ARG;
>
> + st->arch = opus_select_arch();
> st->layout.nb_channels = channels;
> st->layout.nb_streams = streams;
> st->layout.nb_coupled_streams = coupled_streams;
> @@ -767,7 +770,7 @@ static int opus_multistream_encode_native
> ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
> if (st->surround)
> {
> - surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
> + surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
> }
>
> /* Compute bitrate allocation between streams (this could be a lot better) */
> --
> 1.7.9.5
>
More information about the opus
mailing list