[xiph-commits] r9236 - in trunk/speex: include/speex libspeex

jm at motherfish-iii.xiph.org jm at motherfish-iii.xiph.org
Sat May 7 17:08:46 PDT 2005


Author: jm
Date: 2005-05-07 17:08:43 -0700 (Sat, 07 May 2005)
New Revision: 9236

Modified:
   trunk/speex/include/speex/speex_echo.h
   trunk/speex/include/speex/speex_preprocess.h
   trunk/speex/libspeex/mdf.c
   trunk/speex/libspeex/preprocess.c
Log:
Simplified the code a lot. Put back the denoiser hooks for further
cancellation (changed spectral param to float* at least for now).


Modified: trunk/speex/include/speex/speex_echo.h
===================================================================
--- trunk/speex/include/speex/speex_echo.h	2005-05-07 22:42:39 UTC (rev 9235)
+++ trunk/speex/include/speex/speex_echo.h	2005-05-08 00:08:43 UTC (rev 9236)
@@ -44,6 +44,7 @@
    int window_size;
    int M;
    int cancel_count;
+   int adapted;
    float adapt_rate;
 
    float *x;
@@ -51,6 +52,8 @@
    float *d;
    float *D;
    float *y;
+   float *last_y;
+   float *Yps;
    float *Y;
    float *E;
    float *PHI;
@@ -58,6 +61,9 @@
    float *power;
    float *power_1;
    float *grad;
+   float *Rf;
+   float *Yf;
+   float *fratio;
 
    struct drft_lookup *fft_lookup;
 
@@ -72,7 +78,7 @@
 void speex_echo_state_destroy(SpeexEchoState *st);
 
 /** Performs echo cancellation a frame */
-void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, int *Y);
+void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, float *Y);
 
 /** Reset the echo canceller state */
 void speex_echo_reset(SpeexEchoState *st);

Modified: trunk/speex/include/speex/speex_preprocess.h
===================================================================
--- trunk/speex/include/speex/speex_preprocess.h	2005-05-07 22:42:39 UTC (rev 9235)
+++ trunk/speex/include/speex/speex_preprocess.h	2005-05-08 00:08:43 UTC (rev 9236)
@@ -109,10 +109,10 @@
 void speex_preprocess_state_destroy(SpeexPreprocessState *st);
 
 /** Preprocess a frame */
-int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, int *echo);
+int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, float *echo);
 
 /** Preprocess a frame */
-void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, int *echo);
+void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, float *echo);
 
 /** Used like the ioctl function to control the preprocessor parameters */
 int speex_preprocess_ctl(SpeexPreprocessState *st, int request, void *ptr);

Modified: trunk/speex/libspeex/mdf.c
===================================================================
--- trunk/speex/libspeex/mdf.c	2005-05-07 22:42:39 UTC (rev 9235)
+++ trunk/speex/libspeex/mdf.c	2005-05-08 00:08:43 UTC (rev 9236)
@@ -45,9 +45,68 @@
 /*#include <stdio.h>*/
 
 #define BETA .65
+//#define BETA 0
 
 #define min(a,b) ((a)<(b) ? (a) : (b))
 
+static inline float inner_prod(float *x, float *y, int N)
+{
+   int i;
+   float ret=0;
+   for (i=0;i<N;i++)
+      ret += x[i]*y[i];
+   return ret;
+}
+
+static inline void power_spectrum(float *X, float *ps, int N)
+{
+   int i, j;
+   ps[0]=X[0]*X[0];
+   for (i=1,j=1;i<N-1;i+=2,j++)
+   {
+      ps[j] =  X[i]*X[i] + X[i+1]*X[i+1];
+   }
+   ps[j]=X[i]*X[i];
+}
+
+static inline void spectral_mul_accum(float *X, float *Y, float *acc, int N)
+{
+   int i;
+   acc[0] += X[0]*Y[0];
+   for (i=1;i<N-1;i+=2)
+   {
+      acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
+      acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
+   }
+   acc[i] += X[i]*Y[i];
+}
+
+static inline void spectral_mul_conj(float *X, float *Y, float *prod, int N)
+{
+   int i;
+   prod[0] = X[0]*Y[0];
+   for (i=1;i<N-1;i+=2)
+   {
+      prod[i] = (X[i]*Y[i] + X[i+1]*Y[i+1]);
+      prod[i+1] = (-X[i+1]*Y[i] + X[i]*Y[i+1]);
+   }
+   prod[i] = X[i]*Y[i];
+}
+
+
+static inline void weighted_spectral_mul_conj(float *w, float *X, float *Y, float *prod, int N)
+{
+   int i, j;
+   prod[0] = w[0]*X[0]*Y[0];
+   for (i=1,j=1;i<N-1;i+=2,j++)
+   {
+      prod[i] = w[j]*(X[i]*Y[i] + X[i+1]*Y[i+1]);
+      prod[i+1] = w[j]*(-X[i+1]*Y[i] + X[i]*Y[i+1]);
+   }
+   prod[i] = w[j]*X[i]*Y[i];
+}
+
+
 /** Creates a new echo canceller state */
 SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
 {
@@ -67,8 +126,11 @@
    st->x = (float*)speex_alloc(N*sizeof(float));
    st->d = (float*)speex_alloc(N*sizeof(float));
    st->y = (float*)speex_alloc(N*sizeof(float));
+   st->Yps = (float*)speex_alloc(N*sizeof(float));
+   st->last_y = (float*)speex_alloc(N*sizeof(float));
    st->Yf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
    st->Rf = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
+   st->fratio = (float*)speex_alloc((st->frame_size+1)*sizeof(float));
 
    st->X = (float*)speex_alloc(M*N*sizeof(float));
    st->D = (float*)speex_alloc(N*sizeof(float));
@@ -111,8 +173,11 @@
    speex_free(st->x);
    speex_free(st->d);
    speex_free(st->y);
+   speex_free(st->last_y);
+   speex_free(st->Yps);
    speex_free(st->Yf);
    speex_free(st->Rf);
+   speex_free(st->fratio);
 
    speex_free(st->X);
    speex_free(st->D);
@@ -128,7 +193,7 @@
 }
 
 /** Performs echo cancellation on a frame */
-void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, int *Yout)
+void speex_echo_cancel(SpeexEchoState *st, short *ref, short *echo, short *out, float *Yout)
 {
    int i,j,m;
    int N,M;
@@ -138,7 +203,9 @@
    float Eout=0;
    float See=0;
    float ESR;
-         
+   float SER;
+   float Sry=0,Srr=0,Syy=0,Sey=0,Soo=0;
+
    N = st->window_size;
    M = st->M;
    scale = 1.0f/N;
@@ -158,6 +225,7 @@
    for (i=0;i<N*(M-1);i++)
       st->X[i]=st->X[i+N];
 
+   /* Copy new echo frame */
    for (i=0;i<N;i++)
       st->X[(M-1)*N+i]=st->x[i];
 
@@ -165,98 +233,67 @@
    spx_drft_forward(st->fft_lookup, &st->X[(M-1)*N]);
 
    /* Compute filter response Y */
-   for (i=1;i<N-1;i+=2)
-   {
-      st->Y[i] = st->Y[i+1] = 0;
-      for (j=0;j<M;j++)
-      {
-         st->Y[i] += st->X[j*N+i]*st->W[j*N+i] - st->X[j*N+i+1]*st->W[j*N+i+1];
-         st->Y[i+1] += st->X[j*N+i+1]*st->W[j*N+i] + st->X[j*N+i]*st->W[j*N+i+1];
-      }
-   }
-   st->Y[0] = st->Y[N-1] = 0;
+   for (i=0;i<N;i++)
+      st->Y[i] = 0;
    for (j=0;j<M;j++)
-   {
-      st->Y[0] += st->X[j*N]*st->W[j*N];
-      st->Y[N-1] += st->X[(j+1)*N-1]*st->W[(j+1)*N-1];
-   }
-
-
-   /* Transform d (reference signal) to frequency domain */
-   for (i=0;i<N;i++)
-      st->D[i]=st->d[i];
-   spx_drft_forward(st->fft_lookup, st->D);
-
-   {
-      for (i=1,j=1;i<N-1;i+=2,j++)
-      {
-         st->Yf[j] =  st->Y[i]*st->Y[i] + st->Y[i+1]*st->Y[i+1];
-      }
-      st->Yf[0]=st->Y[0]*st->Y[0];
-      st->Yf[st->frame_size]=st->Y[i]*st->Y[i];
-      
-      for (i=1,j=1;i<N-1;i+=2,j++)
-      {
-         st->Rf[j] = (st->Y[i]-st->D[i])*(st->Y[i]-st->D[i]) + (st->Y[i+1]-st->D[i+1])*(st->Y[i+1]-st->D[i+1]);
-      }
-      st->Rf[0]=(st->Y[0]-st->D[0])*(st->Y[0]-st->D[0]);
-      st->Rf[st->frame_size]=(st->Y[i]-st->D[i])*(st->Y[i]-st->D[i]);
-      for (j=0;j<=st->frame_size;j++)
-      {
-         float r;
-         r = .3*st->Yf[j] / (1+st->Rf[j]);
-         if (r>1)
-            r = 1;
-         st->power_1[j] = r;
-         //printf ("%f ", r);
-      }
-      //printf ("\n");
-   }
+      spectral_mul_accum(&st->X[j*N], &st->W[j*N], st->Y, N);
    
-   /* Copy spectrum of Y to Yout for use in an echo post-filter */
-   if (Yout)
-   {
-      for (i=1,j=1;i<N-1;i+=2,j++)
-      {
-         Yout[j] =  st->Y[i]*st->Y[i] + st->Y[i+1]*st->Y[i+1];
-      }
-      Yout[0] = Yout[st->frame_size] = 0;
-      for (i=0;i<=st->frame_size;i++)
-         Yout[i] *= .1;
-   }
-
+   /* Convert Y (filter response) to time domain */
    for (i=0;i<N;i++)
       st->y[i] = st->Y[i];
-   
-   /* Convery Y (filter response) to time domain */
    spx_drft_backward(st->fft_lookup, st->y);
    for (i=0;i<N;i++)
       st->y[i] *= scale;
 
-   Eout = 0;
-   /* Compute error signal (echo canceller output) */
+   /* Transform d (reference signal) to frequency domain */
+   for (i=0;i<N;i++)
+      st->D[i]=st->d[i];
+   spx_drft_forward(st->fft_lookup, st->D);
+
+   /* Compute error signal (signal with echo removed) */ 
    for (i=0;i<st->frame_size;i++)
    {
       float tmp_out;
       tmp_out = (float)ref[i] - st->y[i+st->frame_size];
+      
+      st->E[i] = 0;
+      st->E[i+st->frame_size] = tmp_out;
       Eout += tmp_out*tmp_out;
       
+      /* Saturation */
       if (tmp_out>32767)
          tmp_out = 32767;
       else if (tmp_out<-32768)
          tmp_out = -32768;
-      out[i] = tmp_out;
-        
-      st->E[i] = 0;
-      st->E[i+st->frame_size] = out[i];
+      out[i] = tmp_out;  
    }
+   
+   /* Compute power spectrum of output (D-Y) and filter response (Y) */
+   for (i=0;i<N;i++)
+      st->D[i] -= st->Y[i];
+   power_spectrum(st->D, st->Rf, N);
+   power_spectrum(st->Y, st->Yf, N);
+   
+   /* Compute frequency-domain adaptation mask */
+   for (j=0;j<=st->frame_size;j++)
+   {
+      float r;
+      r = .1*st->Yf[j] / (1+st->Rf[j]);
+      if (r>1)
+         r = 1;
+      st->fratio[j] = r;
+      //printf ("%f ", r);
+   }
+   //printf ("\n");
 
+
    {
-      float Sry=0, Srr=0,Syy=0;
       /*float cos_dist;*/
       for (i=0;i<st->frame_size;i++)
       {
          Sry += st->y[i+st->frame_size] * ref[i];
+         Sey += st->y[i+st->frame_size] * (ref[i]-st->y[i+st->frame_size]);
+         Soo += (ref[i]-st->y[i+st->frame_size]) * (ref[i]-st->y[i+st->frame_size]);
          Srr += (float)ref[i] * (float)ref[i];
          See += (float)echo[i] * (float)echo[i];
          Syy += st->y[i+st->frame_size]*st->y[i+st->frame_size];
@@ -265,28 +302,55 @@
       /*printf (" %f ", cos_dist);*/
       spectral_dist = Sry/(1e8+Srr);
       /*printf (" %f\n", spectral_dist);*/
-      ESR = .2*Syy / (1+Eout);
+      SER = Srr / (1+See);
+      ESR = .1*Syy / (1+Eout);
       if (ESR>1)
          ESR = 1;
       
-      if (ESR>.5)// && st->cancel_count > 50)
+      if (ESR>.9 && st->cancel_count > 50)
       {
          if (!st->adapted)
             fprintf(stderr, "Adapted at %d\n", st->cancel_count); 
          st->adapted = 1;
       }
-      //printf ("%f %f %f %f %f\n", Srr, Syy, See, Eout, ESR);
+      printf ("%f %f %f %f %f %f %f %f %f\n", Srr, Syy, See, Eout, ESR, SER, Sry, Sey, Soo);
       for (i=0;i<=st->frame_size;i++)
       {
-         //st->power_1[i]  = (.1*ESR+.9*min(.3+2*ESR,st->power_1[i]));
+         st->fratio[i]  = (.1*ESR+.9*min(.1+ESR,st->fratio[i]));
+         //st->power_1[i]  = (.0*ESR+1.*min(.0+ESR,st->power_1[i]));
          //st->power_1[i]  = ESR;
-         printf ("%f ", st->power_1[i]);
+         //printf ("%f ", st->power_1[i]);
       }
-      printf ("\n");
+      //printf ("\n");
    }
    /* Convert error to frequency domain */
    spx_drft_forward(st->fft_lookup, st->E);
 
+   
+   if (See>1e8)
+      st->adapt_rate =.8/(2+M);
+   else if (See>3e7)
+      st->adapt_rate =.4/(2+M);
+   else if (See>1e7)
+      st->adapt_rate =.2/(2+M);
+   else if (See>3e6)
+      st->adapt_rate =.1/(2+M);
+   else
+      st->adapt_rate = 0;
+   
+   if (SER<.1)
+      st->adapt_rate =.8/(1+M);
+   else if (SER<1)
+      st->adapt_rate =.4/(1+M);
+   else if (SER<10)
+      st->adapt_rate =.2/(1+M);
+   else
+      st->adapt_rate = 0;
+   
+   if (st->adapted)
+      st->adapt_rate = .95f/(1+M);
+   
+
    /* Compute input power in each frequency bin */
    {
       float s;
@@ -304,9 +368,9 @@
          {
             float E = st->X[m*N+i]*st->X[m*N+i] + st->X[m*N+i+1]*st->X[m*N+i+1];
             tmp += E;
-            if (st->power[j] < .2*E)
+            /*if (st->power[j] < .2*E)
                st->power[j] = .2*E;
-
+            */
          }
          tmp *= s;
          if (st->cancel_count<M)
@@ -333,12 +397,10 @@
       }
       
       if (st->adapted)
-      //if (0)
       {
          for (i=0;i<=st->frame_size;i++)
          {
-            //st->power_1[i]  = (.1*ESR+.9*min(1.5f*ESR,st->power_1[i]));
-            st->power_1[i] /= (1e3f+st->power[i]);
+            st->power_1[i] = st->fratio[i] /(1+st->power[i]);
          }
       } else {
          for (i=0;i<=st->frame_size;i++)
@@ -346,19 +408,12 @@
       }
    }
 
+   
+
    /* Compute weight gradient */
    for (j=0;j<M;j++)
    {
-      for (i=1,m=1;i<N-1;i+=2,m++)
-      {
-         st->PHI[i] = st->power_1[m] 
-         * (st->X[j*N+i]*st->E[i] + st->X[j*N+i+1]*st->E[i+1]);
-         st->PHI[i+1] = st->power_1[m] 
-         * (-st->X[j*N+i+1]*st->E[i] + st->X[j*N+i]*st->E[i+1]);
-      }
-      st->PHI[0] = st->power_1[0] * st->X[j*N]*st->E[0];
-      st->PHI[N-1] = st->power_1[st->frame_size] * st->X[(j+1)*N-1]*st->E[N-1];
-      
+      weighted_spectral_mul_conj(st->power_1, &st->X[j*N], st->E, st->PHI, N);
 
 #if 0 /* Set to 1 to enable MDF instead of AUMDF (and comment out weight constraint below) */
       spx_drft_backward(st->fft_lookup, st->PHI);
@@ -377,57 +432,6 @@
 
       
    }
-
-   /* Adjust adaptation rate */
-   if (st->cancel_count>2*M)
-   {
-      if (st->cancel_count<8*M)
-      {
-         st->adapt_rate = .3f/(2+M);
-      } else {
-         if (spectral_dist > .5 && cos_dist > .7)
-            st->adapt_rate = .4f/(2+M);
-         else if (spectral_dist > .3 && cos_dist > .5)
-            st->adapt_rate = .2f/(2+M);
-         else if (spectral_dist > .15 && cos_dist > .3)
-            st->adapt_rate = .1f/(2+M);
-         else if (cos_dist > .01)
-            st->adapt_rate = .05f/(2+M);
-         else
-            st->adapt_rate = .01f/(2+M);
-      }
-   } else
-      st->adapt_rate = .0f;
-      
-      //st->adapt_rate *=4;// .1f/(2+M);
-   if (See>1e8)
-      st->adapt_rate =.8/(2+M);
-   else if (See>3e7)
-      st->adapt_rate =.4/(2+M);
-   else if (See>1e7)
-      st->adapt_rate =.2/(2+M);
-   else if (See>3e6)
-      st->adapt_rate =.1/(2+M);
-   else
-      st->adapt_rate = 0;
-   
-   st->adapt_rate =.4/(2+M);
-   /*if (st->cancel_count < 40)
-   st->adapt_rate *= 2.;
-   */
-   
-   /*if (st->cancel_count<30)
-      st->adapt_rate *= 1.5;
-   else
-      st->adapt_rate *= .9;
-   */
-#if 0
-   if (st->cancel_count>70)
-      st->adapt_rate = .6*ESR/(2+M);
-#else
-   if (st->adapted)
-      st->adapt_rate = .9f/(1+M);
-#endif
    /* Update weights */
    for (i=0;i<M*N;i++)
       st->W[i] += st->adapt_rate*st->grad[i];
@@ -449,5 +453,40 @@
 
    }
 
+   
+   
+   /* Compute spectrum of estimated echo for use in an echo post-filter (if necessary)*/
+   if (Yout)
+   {
+      for (i=1,j=1;i<N-1;i+=2,j++)
+      {
+         Yout[j] =  st->Y[i]*st->Y[i] + st->Y[i+1]*st->Y[i+1];
+      }
+      Yout[0] = Yout[st->frame_size] = 0;
+      for (i=0;i<=st->frame_size;i++)
+         Yout[i] *= .1;
+      
+      
+      if (st->adapted)
+      {
+         for (i=0;i<st->frame_size;i++)
+            st->last_y[i] = st->last_y[st->frame_size+i];
+         for (i=0;i<st->frame_size;i++)
+            st->last_y[st->frame_size+i] = st->y[st->frame_size+i];
+      } else {
+         for (i=0;i<N;i++)
+            st->last_y[i] = st->x[i];
+      }
+      for (i=0;i<N;i++)
+         st->Yps[i] = (.5-.5*cos(2*M_PI*i/N))*st->last_y[i];
+      spx_drft_forward(st->fft_lookup, st->Yps);
+      for (i=1;i<st->frame_size;i++)
+         st->Yps[i] = .1*st->Yps[2*i-1]*st->Yps[2*i-1] + st->Yps[2*i]*st->Yps[2*i];
+      st->Yps[0] = .1*st->Yps[0]*st->Yps[0];
+      st->Yps[st->frame_size] = .1*st->Yps[N-1]*st->Yps[N-1];
+      for (i=0;i<=st->frame_size;i++)
+         Yout[i] = 3*st->Yps[i];
+   }
+
 }
 

Modified: trunk/speex/libspeex/preprocess.c
===================================================================
--- trunk/speex/libspeex/preprocess.c	2005-05-07 22:42:39 UTC (rev 9235)
+++ trunk/speex/libspeex/preprocess.c	2005-05-08 00:08:43 UTC (rev 9236)
@@ -36,7 +36,7 @@
 #endif
 
 #include <math.h>
-#include <speex/speex_preprocess.h>
+#include "speex/speex_preprocess.h"
 #include "misc.h"
 #include "smallft.h"
 
@@ -102,10 +102,11 @@
    if (x>9.5)
       return 1+.12/x;
 
-   integer = floor(x);
-   frac = x-integer;
+   integer = floor(2*x);
+   frac = 2*x-integer;
    ind = (int)integer;
-   
+   /*if (ind > 20 || ind < 0)
+   fprintf (stderr, "error: %d %f\n", ind, x);*/
    return ((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
 }
 
@@ -269,7 +270,7 @@
    speex_free(st);
 }
 
-static void update_noise(SpeexPreprocessState *st, float *ps, int *echo)
+static void update_noise(SpeexPreprocessState *st, float *ps, float *echo)
 {
    int i;
    float beta;
@@ -281,10 +282,10 @@
    if (!echo)
    {
       for (i=0;i<st->ps_size;i++)
-         st->noise[i] = (1.f-beta)*st->noise[i] + beta*ps[i];   
+         st->noise[i] = (1.f-beta)*st->noise[i] + beta*ps[i];
    } else {
       for (i=0;i<st->ps_size;i++)
-         st->noise[i] = (1.f-beta)*st->noise[i] + beta*max(0.f,ps[i]-echo[i]); 
+         st->noise[i] = (1.f-beta)*st->noise[i] + beta*max(1.f,ps[i]-echo[i]); 
 #if 0
       for (i=0;i<st->ps_size;i++)
          st->noise[i] = 0;
@@ -620,7 +621,7 @@
 
 #define NOISE_OVERCOMPENS 1.4
 
-int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, int *echo)
+int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, float *echo)
 {
    int i;
    int is_speech=1;
@@ -648,7 +649,7 @@
    /* Deal with residual echo if provided */
    if (echo)
       for (i=1;i<N;i++)
-         st->echo_noise[i] = (.7f*st->echo_noise[i] + .3f* echo[i]);
+         st->echo_noise[i] = (.3f*st->echo_noise[i] + echo[i]);
 
    /* Compute a posteriori SNR */
    for (i=1;i<N;i++)
@@ -755,7 +756,7 @@
          if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
          {
             if (echo)
-               st->noise[i] = .90f*st->noise[i] + .1f*(st->ps[i]-echo[i]);
+               st->noise[i] = .90f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
             else
                st->noise[i] = .90f*st->noise[i] + .1f*st->ps[i];
          }
@@ -937,7 +938,7 @@
    return is_speech;
 }
 
-void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, int *echo)
+void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, float *echo)
 {
    int i;
    int N = st->ps_size;
@@ -956,7 +957,7 @@
       if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
       {
          if (echo)
-            st->noise[i] = .90f*st->noise[i] + .1f*(st->ps[i]-echo[i]);
+            st->noise[i] = .90f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
          else
             st->noise[i] = .90f*st->noise[i] + .1f*st->ps[i];
       }



More information about the commits mailing list