[xiph-commits] r10695 - trunk/speex/libspeex

Sat Jan 7 05:28:59 PST 2006

Author: jm
Date: 2006-01-07 05:28:56 -0800 (Sat, 07 Jan 2006)
New Revision: 10695

Modified:
   trunk/speex/libspeex/mdf.c
Log:
Weights now use 32 bits instead of 16. This seems to improve the adaptation,
especially for short frame size (less prone to thresholding effects). Also
added some general documentation.


Modified: trunk/speex/libspeex/mdf.c
===================================================================

--- trunk/speex/libspeex/mdf.c	2006-01-07 10:23:55 UTC (rev 10694)
+++ trunk/speex/libspeex/mdf.c	2006-01-07 13:28:56 UTC (rev 10695)
@@ -1,12 +1,8 @@
-/* Copyright (C) 2003-2005 Jean-Marc Valin
+/* Copyright (C) 2003-2006 Jean-Marc Valin
 
-   File: speex_echo.c
-   Echo cancelling based on the MDF algorithm described in:
+   File: mdf.c
+   Echo canceller based on the MDF algorithm (see below)
 
-   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 
-   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 
-   February 1990.
-
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
    met:
@@ -34,6 +30,33 @@
    POSSIBILITY OF SUCH DAMAGE.
 */
 
+/*
+   The echo canceller is based on the MDF algorithm described in:
+
+   J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter, 
+   IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2, 
+   February 1990.
+   
+   We use the Alternatively Updated MDF (AUMDF) variant. Robustness to 
+   double-talk is achieved using a variable learning rate as described in:
+   
+   Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo 
+   Cancellation With Double-Talk. Submitted to IEEE Transactions on Speech 
+   and Audio Processing, 2006.
+   
+   About the fixed-point version:
+   All the signals are represented with 16-bit words. The filter weights 
+   are represented with 32-bit words, but only the top 16 bits are used
+   in most cases. The lower 16 bits are completely reliable (due to the
+   fact that the update is done only on the top bits), but help in the
+   adaptation -- probably by removing a "threshold effect" due to
+   quantization when the gradient is small.
+   
+   Another kludge that seems to work good: when performing the weight
+   update, we only move half the way toward the "goal" this seems to
+   reduce the effect of quantization noise in the update phase.
+   
+ */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -64,12 +87,15 @@
 static const spx_float_t MAX_ALPHA = ((spx_float_t){16777, -21});
 static const spx_float_t ALPHA0 = ((spx_float_t){26214, -19});
 static const spx_float_t MIN_LEAK = ((spx_float_t){16777, -24});
+#define TOP16(x) ((x)>>16)
 #else
 static const spx_float_t MAX_ALPHA = .008f;
 static const spx_float_t ALPHA0 = .05f;
 static const spx_float_t MIN_LEAK = .001f;
+#define TOP16(x) (x)
 #endif
 
+
 /** Speex echo cancellation state. */
 struct SpeexEchoState_ {
    int frame_size;           /**< Number of samples processed each time */
@@ -91,8 +117,8 @@
    spx_word32_t *Yps;
    spx_word16_t *Y;
    spx_word16_t *E;
-   spx_word16_t *PHI;
-   spx_word16_t *W;
+   spx_word32_t *PHI;
+   spx_word32_t *W;
    spx_word32_t *power;
    spx_float_t *power_1;
    spx_word32_t *Rf;
@@ -103,7 +129,6 @@
    spx_float_t Pey;
    spx_float_t Pyy;
    spx_word16_t *window;
-   /*struct drft_lookup *fft_lookup;*/
    void *fft_table;
    spx_word16_t memX, memD, memE;
    spx_word16_t preemph;
@@ -140,13 +165,13 @@
 
 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
 #ifdef FIXED_POINT
-static inline void spectral_mul_accum(spx_word16_t *X, spx_word16_t *Y, spx_word16_t *acc, int N, int M)
+static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M)
 {
    int i,j;
    spx_word32_t tmp1=0,tmp2=0;
    for (j=0;j<M;j++)
    {
-      tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
+      tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
    }
    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
    for (i=1;i<N-1;i+=2)
@@ -154,8 +179,8 @@
       tmp1 = tmp2 = 0;
       for (j=0;j<M;j++)
       {
-         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
-         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
+         tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
+         tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
       }
       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
@@ -163,12 +188,12 @@
    tmp1 = tmp2 = 0;
    for (j=0;j<M;j++)
    {
-      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
+      tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
    }
    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
 }
 #else
-static inline void spectral_mul_accum(spx_word16_t *X, spx_word16_t *Y, spx_word16_t *acc, int N, int M)
+static inline void spectral_mul_accum(spx_word16_t *X, spx_word32_t *Y, spx_word16_t *acc, int N, int M)
 {
    int i,j;
    for (i=0;i<N;i++)
@@ -189,7 +214,7 @@
 #endif
 
 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
-static inline void weighted_spectral_mul_conj(spx_float_t *w, spx_word16_t *X, spx_word16_t *Y, spx_word16_t *prod, int N)
+static inline void weighted_spectral_mul_conj(spx_float_t *w, spx_word16_t *X, spx_word16_t *Y, spx_word32_t *prod, int N)
 {
    int i, j;
    prod[0] = FLOAT_MUL32(w[0],MULT16_16(X[0],Y[0]));
@@ -242,8 +267,8 @@
    st->X = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
    st->Y = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
    st->E = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
-   st->W = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
-   st->PHI = (spx_word16_t*)speex_alloc(M*N*sizeof(spx_word16_t));
+   st->W = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
+   st->PHI = (spx_word32_t*)speex_alloc(M*N*sizeof(spx_word32_t));
    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
@@ -423,6 +448,9 @@
    /* Smooth echo energy estimate over time */
    for (j=0;j<=st->frame_size;j++)
       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
+   
+   /* Enable this to compute the power based only on the tail (would need to compute more 
+      efficiently to make this really useful */
    if (0)
    {
       float scale2 = .5f/M;
@@ -475,8 +503,9 @@
       leak_estimate = 32767;
    else
       leak_estimate = SHL16(leak_estimate,1);
-
    /*printf ("%f\n", leak_estimate);*/
+   
+   /* Compute Residual to Error Ratio */
 #ifdef FIXED_POINT
    tmp32 = MULT16_32_Q15(leak_estimate,Syy);
    tmp32 = ADD32(tmp32, SHL32(tmp32,1));
@@ -512,7 +541,7 @@
 #endif
          r = MULT16_32_Q15(QCONST16(.8,15),r) + MULT16_32_Q15(QCONST16(.2,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
-         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(MULT16_32_Q15(M_1,r),FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT);
+         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(MULT16_32_Q15(M_1,r),FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
       }
    } else {
       spx_word32_t Sxx;
@@ -532,7 +561,7 @@
       adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(MULT16_32_Q15(M_1,Sxx), See),15));
       
       for (i=0;i<=st->frame_size;i++)
-         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT-15);
+         st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
 
 
       /* How much have we adapted so far? */
@@ -552,9 +581,10 @@
       st->PHI[i] = st->W[i] - st->PHI[i];
    }
    
-   /* AUMDF weight constraint */
+   /* Update weight to prevent circular convolution (MDF / AUMDF) */
    for (j=0;j<M;j++)
    {
+      /* This is a variant of the Alternatively Updated MDF (AUMDF) */
       /* Remove the "if" to make this an MDF filter */
       if (j==M-1 || st->cancel_count%(M-1) == j)
       {
@@ -562,7 +592,7 @@
 #ifdef FIXED_POINT
          spx_word16_t w2[N];
          for (i=0;i<N;i++)
-            w2[i] = PSHR16(st->W[j*N+i],NORMALIZE_SCALEDOWN);
+            w2[i] = PSHR32(st->W[j*N+i],NORMALIZE_SCALEDOWN+16);
          spx_ifft(st->fft_table, w2, w);
          for (i=0;i<st->frame_size;i++)
          {
@@ -573,8 +603,9 @@
             w[i]=SHL(w[i],NORMALIZE_SCALEUP);
          }
          spx_fft(st->fft_table, w, w2);
+         /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
          for (i=0;i<N;i++)
-            st->W[j*N+i] -= SHL16(w2[i],NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
+            st->W[j*N+i] -= SHL32(w2[i],16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
 #else
          spx_ifft(st->fft_table, &st->W[j*N], w);
          for (i=st->frame_size;i<N;i++)