[opus] [PATCH 9/9] Optimize silk_inner_prod_aligned_scale() for ARM NEON

Linfeng Zhang linfengz at google.com
Fri Aug 26 19:05:21 UTC 2016


Created corresponding unit test, and the optimization is bit exact with C
function.
---
 silk/SigProc_FIX.h                                 |  7 ++-
 silk/arm/arm_silk_map.c                            | 12 ++++
 silk/arm/inner_prod_aligned_arm.h                  | 58 +++++++++++++++++++
 silk/arm/inner_prod_aligned_neon_intr.c            | 66 ++++++++++++++++++++++
 silk/enc_API.c                                     |  2 +-
 silk/inner_prod_aligned.c                          |  2 +-
 silk/main.h                                        |  6 +-
 silk/stereo_LR_to_MS.c                             |  7 ++-
 silk/stereo_find_predictor.c                       |  5 +-
 .../test_unit_optimization_inner_prod_aligned.c    | 63 +++++++++++++++++++++
 silk_headers.mk                                    |  1 +
 silk_sources.mk                                    |  1 +
 tests/test_unit_optimization.c                     |  2 +
 13 files changed, 222 insertions(+), 10 deletions(-)
 create mode 100644 silk/arm/inner_prod_aligned_arm.h
 create mode 100644 silk/arm/inner_prod_aligned_neon_intr.c
 create mode 100644 silk/tests/test_unit_optimization_inner_prod_aligned.c

diff --git a/silk/SigProc_FIX.h b/silk/SigProc_FIX.h
index a739c8d..b155cad 100644
--- a/silk/SigProc_FIX.h
+++ b/silk/SigProc_FIX.h
@@ -49,6 +49,7 @@ extern "C"
 #endif
 
 #if (defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
+#include "arm/inner_prod_aligned_arm.h"
 #include "arm/LPC_analysis_filter_arm.h"
 #include "arm/LPC_inv_pred_gain_arm.h"
 #endif
@@ -377,7 +378,7 @@ opus_int32 silk_inner_prod_aligned(
 );
 
 
-opus_int32 silk_inner_prod_aligned_scale(
+opus_int32 silk_inner_prod_aligned_scale_c(
     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
     const opus_int              scale,              /*    I number of bits to shift                                     */
@@ -390,6 +391,10 @@ opus_int64 silk_inner_prod16_aligned_64_c(
     const opus_int              len                 /*    I vector lengths                                              */
 );
 
+#if !defined(OVERRIDE_silk_inner_prod_aligned_scale)
+#define silk_inner_prod_aligned_scale(inVec1, inVec2, scale, len, arch) ((void)(arch),silk_inner_prod_aligned_scale_c(inVec1, inVec2, scale, len))
+#endif
+
 /********************************************************************/
 /*                                MACROS                            */
 /********************************************************************/
diff --git a/silk/arm/arm_silk_map.c b/silk/arm/arm_silk_map.c
index 51b17c7..4398ebe 100644
--- a/silk/arm/arm_silk_map.c
+++ b/silk/arm/arm_silk_map.c
@@ -36,6 +36,18 @@ POSSIBILITY OF SUCH DAMAGE.
 
 # if (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
 
+opus_int32 (*const SILK_INNER_PROD_ALIGNED_SCALE_IMPL[OPUS_ARCHMASK + 1])(
+        const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+        const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+        const opus_int              scale,              /*    I number of bits to shift                                     */
+        const opus_int              len                 /*    I vector lengths                                              */
+) = {
+      silk_inner_prod_aligned_scale_c,              /* ARMv4 */
+      silk_inner_prod_aligned_scale_c,              /* EDSP */
+      silk_inner_prod_aligned_scale_c,              /* Media */
+      MAY_HAVE_NEON(silk_inner_prod_aligned_scale), /* Neon */
+};
+
 void (*const SILK_LPC_ANALYSIS_FILTER_IMPL[OPUS_ARCHMASK + 1])(
         opus_int16                  *out,                       /* O    Output signal                                               */
         const opus_int16            *in,                        /* I    Input signal                                                */
diff --git a/silk/arm/inner_prod_aligned_arm.h b/silk/arm/inner_prod_aligned_arm.h
new file mode 100644
index 0000000..2642195
--- /dev/null
+++ b/silk/arm/inner_prod_aligned_arm.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2016 Google Inc. */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#if !defined(INNER_PROD_ALIGNED_ARM_H)
+# define INNER_PROD_ALIGNED_ARM_H
+
+# include "celt/arm/armcpu.h"
+
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
+opus_int32 silk_inner_prod_aligned_scale_neon(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              scale,              /*    I number of bits to shift                                     */
+    const opus_int              len                 /*    I vector lengths                                              */
+);
+# endif
+
+# if !defined(OPUS_HAVE_RTCD)
+#  define OVERRIDE_silk_inner_prod_aligned_scale                   (1)
+#  define silk_inner_prod_aligned_scale(inVec1, inVec2, scale, len, arch)  ((void)(arch),PRESUME_NEON(silk_inner_prod_aligned_scale)(inVec1, inVec2, scale, len))
+# endif
+
+# if !defined(OVERRIDE_silk_inner_prod_aligned_scale)
+/*Is run-time CPU detection enabled on this platform?*/
+#  if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
+extern opus_int32 (*const SILK_INNER_PROD_ALIGNED_SCALE_IMPL[OPUS_ARCHMASK+1])(const opus_int16 *const inVec1, const opus_int16 *const inVec2, const opus_int scale, const opus_int len);
+#   define OVERRIDE_silk_inner_prod_aligned_scale                  (1)
+#   define silk_inner_prod_aligned_scale(inVec1, inVec2, scale, len, arch) ((*SILK_INNER_PROD_ALIGNED_SCALE_IMPL[(arch)&OPUS_ARCHMASK])(inVec1, inVec2, scale, len))
+#  elif defined(OPUS_ARM_PRESUME_NEON_INTR)
+#   define OVERRIDE_silk_inner_prod_aligned_scale                  (1)
+#   define silk_inner_prod_aligned_scale(inVec1, inVec2, scale, len, arch) ((void)(arch),silk_inner_prod_aligned_scale_neon(inVec1, inVec2, scale, len))
+#  endif
+# endif
+
+#endif /* end INNER_PROD_ALIGNED_ARM_H */
diff --git a/silk/arm/inner_prod_aligned_neon_intr.c b/silk/arm/inner_prod_aligned_neon_intr.c
new file mode 100644
index 0000000..358165c
--- /dev/null
+++ b/silk/arm/inner_prod_aligned_neon_intr.c
@@ -0,0 +1,66 @@
+/* Copyright (c) 2016 Google Inc. */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <arm_neon.h>
+#include "SigProc_FIX.h"
+
+opus_int32 silk_inner_prod_aligned_scale_neon(
+    const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
+    const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
+    const opus_int              scale,              /*    I number of bits to shift                                     */
+    const opus_int              len                 /*    I vector lengths                                              */
+)
+{
+    opus_int        i;
+    opus_int32      sum;
+    const int32x4_t scaleLeft_s32x4 = vdupq_n_s32(-scale);
+    int32x4_t       sum_s32x4       = vdupq_n_s32(0);
+    int64x2_t       sum_s64x2;
+    int64x1_t       sum_s64x1;
+
+    for( i = 0; i < len - 7; i += 8 ) {
+        const int16x8_t in1 = vld1q_s16(&inVec1[i]);
+        const int16x8_t in2 = vld1q_s16(&inVec2[i]);
+        int32x4_t t0        = vmull_s16(vget_low_s16 (in1), vget_low_s16 (in2));
+        int32x4_t t1        = vmull_s16(vget_high_s16(in1), vget_high_s16(in2));
+        t0                  = vshlq_s32(t0, scaleLeft_s32x4);
+        sum_s32x4           = vaddq_s32(sum_s32x4, t0);
+        t1                  = vshlq_s32(t1, scaleLeft_s32x4);
+        sum_s32x4           = vaddq_s32(sum_s32x4, t1);
+    }
+    sum_s64x2 = vpaddlq_s32(sum_s32x4);
+    sum_s64x1 = vadd_s64(vget_low_s64(sum_s64x2), vget_high_s64(sum_s64x2));
+    sum       = vget_lane_s64(sum_s64x1, 0);
+
+    for( ; i < len; i++ ) {
+        sum = silk_ADD_RSHIFT32( sum, silk_SMULBB( inVec1[ i ], inVec2[ i ] ), scale );
+    }
+    return sum;
+}
diff --git a/silk/enc_API.c b/silk/enc_API.c
index ba3db06..09fd551 100644
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -411,7 +411,7 @@ opus_int silk_Encode(                                   /* O    Returns error co
                 silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ],
                     psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ],
                     MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono,
-                    psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length );
+                    psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length, psEnc->state_Fxx[ 0 ].sCmn.arch );
                 if( psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] == 0 ) {
                     /* Reset side channel encoder memory for first frame with side coding */
                     if( psEnc->prev_decode_only_middle == 1 ) {
diff --git a/silk/inner_prod_aligned.c b/silk/inner_prod_aligned.c
index 257ae9e..23db9ee 100644
--- a/silk/inner_prod_aligned.c
+++ b/silk/inner_prod_aligned.c
@@ -31,7 +31,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 #include "SigProc_FIX.h"
 
-opus_int32 silk_inner_prod_aligned_scale(
+opus_int32 silk_inner_prod_aligned_scale_c(
     const opus_int16 *const     inVec1,             /*    I input vector 1                                              */
     const opus_int16 *const     inVec2,             /*    I input vector 2                                              */
     const opus_int              scale,              /*    I number of bits to shift                                     */
diff --git a/silk/main.h b/silk/main.h
index 73c94fc..ef57f5e 100644
--- a/silk/main.h
+++ b/silk/main.h
@@ -58,7 +58,8 @@ void silk_stereo_LR_to_MS(
     opus_int                    prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
     opus_int                    toMono,                         /* I    Last frame before a stereo->mono transition */
     opus_int                    fs_kHz,                         /* I    Sample rate (kHz)                           */
-    opus_int                    frame_length                    /* I    Number of samples                           */
+    opus_int                    frame_length,                   /* I    Number of samples                           */
+    int                         arch                            /* I    Architecture                                */
 );
 
 /* Convert adaptive Mid/Side representation to Left/Right stereo signal */
@@ -78,7 +79,8 @@ opus_int32 silk_stereo_find_predictor(                          /* O    Returns
     const opus_int16            y[],                            /* I    Target signal                               */
     opus_int32                  mid_res_amp_Q0[],               /* I/O  Smoothed mid, residual norms                */
     opus_int                    length,                         /* I    Number of samples                           */
-    opus_int                    smooth_coef_Q16                 /* I    Smoothing coefficient                       */
+    opus_int                    smooth_coef_Q16,                /* I    Smoothing coefficient                       */
+    int                         arch                            /* I    Architecture                                */
 );
 
 /* Quantize mid/side predictors */
diff --git a/silk/stereo_LR_to_MS.c b/silk/stereo_LR_to_MS.c
index dda0298..4c161af 100644
--- a/silk/stereo_LR_to_MS.c
+++ b/silk/stereo_LR_to_MS.c
@@ -44,7 +44,8 @@ void silk_stereo_LR_to_MS(
     opus_int                    prev_speech_act_Q8,             /* I    Speech activity level in previous frame     */
     opus_int                    toMono,                         /* I    Last frame before a stereo->mono transition */
     opus_int                    fs_kHz,                         /* I    Sample rate (kHz)                           */
-    opus_int                    frame_length                    /* I    Number of samples                           */
+    opus_int                    frame_length,                   /* I    Number of samples                           */
+    int                         arch                            /* I    Architecture                                */
 )
 {
     opus_int   n, is10msFrame, denom_Q16, delta0_Q13, delta1_Q13;
@@ -98,8 +99,8 @@ void silk_stereo_LR_to_MS(
         SILK_FIX_CONST( STEREO_RATIO_SMOOTH_COEF,     16 );
     smooth_coef_Q16 = silk_SMULWB( silk_SMULBB( prev_speech_act_Q8, prev_speech_act_Q8 ), smooth_coef_Q16 );
 
-    pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16 );
-    pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16 );
+    pred_Q13[ 0 ] = silk_stereo_find_predictor( &LP_ratio_Q14, LP_mid, LP_side, &state->mid_side_amp_Q0[ 0 ], frame_length, smooth_coef_Q16, arch );
+    pred_Q13[ 1 ] = silk_stereo_find_predictor( &HP_ratio_Q14, HP_mid, HP_side, &state->mid_side_amp_Q0[ 2 ], frame_length, smooth_coef_Q16, arch );
     /* Ratio of the norms of residual and mid signals */
     frac_Q16 = silk_SMLABB( HP_ratio_Q14, LP_ratio_Q14, 3 );
     frac_Q16 = silk_min( frac_Q16, SILK_FIX_CONST( 1, 16 ) );
diff --git a/silk/stereo_find_predictor.c b/silk/stereo_find_predictor.c
index e30e90b..b62a099 100644
--- a/silk/stereo_find_predictor.c
+++ b/silk/stereo_find_predictor.c
@@ -38,7 +38,8 @@ opus_int32 silk_stereo_find_predictor(                          /* O    Returns
     const opus_int16            y[],                            /* I    Target signal                               */
     opus_int32                  mid_res_amp_Q0[],               /* I/O  Smoothed mid, residual norms                */
     opus_int                    length,                         /* I    Number of samples                           */
-    opus_int                    smooth_coef_Q16                 /* I    Smoothing coefficient                       */
+    opus_int                    smooth_coef_Q16,                /* I    Smoothing coefficient                       */
+    int                         arch                            /* I    Architecture                                */
 )
 {
     opus_int   scale, scale1, scale2;
@@ -52,7 +53,7 @@ opus_int32 silk_stereo_find_predictor(                          /* O    Returns
     nrgy = silk_RSHIFT32( nrgy, scale - scale2 );
     nrgx = silk_RSHIFT32( nrgx, scale - scale1 );
     nrgx = silk_max_int( nrgx, 1 );
-    corr = silk_inner_prod_aligned_scale( x, y, scale, length );
+    corr = silk_inner_prod_aligned_scale( x, y, scale, length, arch );
     pred_Q13 = silk_DIV32_varQ( corr, nrgx, 13 );
     pred_Q13 = silk_LIMIT( pred_Q13, -(1 << 14), 1 << 14 );
     pred2_Q10 = silk_SMULWB( pred_Q13, pred_Q13 );
diff --git a/silk/tests/test_unit_optimization_inner_prod_aligned.c b/silk/tests/test_unit_optimization_inner_prod_aligned.c
new file mode 100644
index 0000000..fe1092e
--- /dev/null
+++ b/silk/tests/test_unit_optimization_inner_prod_aligned.c
@@ -0,0 +1,63 @@
+/* Copyright (c) 2016 Google Inc. */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+#include "modes.h"
+#include "SigProc_FIX.h"
+
+#define MAX_LEN 240
+
+static int test_silk_inner_prod_aligned_scale(int arch)
+{
+    opus_int16 inVec1[MAX_LEN], inVec2[MAX_LEN];
+    opus_int   scale, len;
+    opus_int32 sum_org, sum_opt;
+    opus_int   i;
+
+    printf("%44s() ...", __func__);
+    for(scale = 0; scale <= 31; scale++) {
+        for(len = 0; len <= MAX_LEN; len++) { /* len is larger than or equal to d. */
+            for (i=0;i<len;++i) {
+                inVec1[i] = (rand() % 65536) - 32768;
+                inVec1[i] = (rand() % 65536) - 32768;
+            }
+            sum_org = silk_inner_prod_aligned_scale_c(inVec1, inVec2, scale, len);
+            sum_opt = silk_inner_prod_aligned_scale(inVec1, inVec2, scale, len, arch);
+            if (sum_org != sum_opt) {
+                printf("scale=%2d len=%3d failed!\nError in silk_inner_prod_aligned_scale() unit test!!!\n", scale, len);
+                return -1;
+            }
+        }
+    }
+    printf(" passed!\n");
+    return 0;
+}
diff --git a/silk_headers.mk b/silk_headers.mk
index d3dce08..1761488 100644
--- a/silk_headers.mk
+++ b/silk_headers.mk
@@ -22,6 +22,7 @@ silk/resampler_rom.h \
 silk/resampler_structs.h \
 silk/SigProc_FIX.h \
 silk/x86/SigProc_FIX_sse.h \
+silk/arm/inner_prod_aligned_arm.h \
 silk/arm/LPC_analysis_filter_arm.h \
 silk/arm/LPC_inv_pred_gain_arm.h \
 silk/arm/macros_armv4.h \
diff --git a/silk_sources.mk b/silk_sources.mk
index 9dcfe83..1823d30 100644
--- a/silk_sources.mk
+++ b/silk_sources.mk
@@ -85,6 +85,7 @@ silk/x86/VQ_WMat_EC_sse.c
 
 SILK_SOURCES_ARM_NEON_INTR = \
 silk/arm/arm_silk_map.c \
+silk/arm/inner_prod_aligned_neon_intr.c \
 silk/arm/LPC_analysis_filter_neon_intr.c \
 silk/arm/LPC_inv_pred_gain_neon_intr.c \
 silk/arm/NSQ_del_dec_neon_intr.c \
diff --git a/tests/test_unit_optimization.c b/tests/test_unit_optimization.c
index 19fda42..0a8b734 100644
--- a/tests/test_unit_optimization.c
+++ b/tests/test_unit_optimization.c
@@ -45,6 +45,7 @@
 
 #endif
 
+# include "silk/tests/test_unit_optimization_inner_prod_aligned.c"
 # include "silk/tests/test_unit_optimization_LPC_analysis_filter.c"
 # include "silk/tests/test_unit_optimization_LPC_inv_pred_gain.c"
 # include "silk/tests/test_unit_optimization_NSQ_del_dec.c"
@@ -67,6 +68,7 @@ int main(void)
       result |= test_silk_LPC_inverse_pred_gain_Q24(arch);
       result |= test_warped_autocorrelation(arch);
 #endif /* FIXED_POINT */
+      result |= test_silk_inner_prod_aligned_scale(arch);
       result |= test_silk_LPC_analysis_filter(arch);
       result |= test_silk_LPC_inverse_pred_gain(arch);
       result |= test_silk_NSQ_del_dec(arch);
-- 
2.8.0.rc3.226.g39d4020



More information about the opus mailing list