[xiph-cvs] cvs commit: speex/libspeex denoise.c speex_denoise.h

Jean-Marc Valin jm at xiph.org
Thu May 22 09:25:33 PDT 2003



jm          03/05/22 12:25:33

  Modified:    libspeex denoise.c speex_denoise.h
  Log:
  experimental sub-bands VAD

Revision  Changes    Path
1.14      +79 -5     speex/libspeex/denoise.c

Index: denoise.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/denoise.c,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- denoise.c	21 May 2003 22:16:39 -0000	1.13
+++ denoise.c	22 May 2003 16:25:33 -0000	1.14
@@ -49,6 +49,8 @@
 #define SQRT_M_PI_2 0.88623
 #define LOUDNESS_EXP 3.5
 
+#define NB_BANDS 8
+
 static void conj_window(float *w, int len)
 {
    int i;
@@ -121,6 +123,12 @@
    st->inbuf = (float*)speex_alloc(N3*sizeof(float));
    st->outbuf = (float*)speex_alloc(N3*sizeof(float));
 
+   st->noise_bands = (float*)speex_alloc(NB_BANDS*sizeof(float));
+   st->noise_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float));
+   st->speech_bands = (float*)speex_alloc(NB_BANDS*sizeof(float));
+   st->speech_bands2 = (float*)speex_alloc(NB_BANDS*sizeof(float));
+   st->noise_bandsN = st->speech_bandsN = 1;
+
    conj_window(st->window, 2*N3);
    for (i=2*N3;i<2*st->ps_size;i++)
       st->window[i]=1;
@@ -384,20 +392,86 @@
 
    /*fprintf (stderr, "%f %f ", mean_prior, mean_post);*/
    {
+      float bands[NB_BANDS];
+      int j;
       float p0, p1;
       float x = sqrt(mean_post);
-      p1 = .0005+.6*exp(-.5*(x-.4)*(x-.4)*11)+.1*exp(-1.2*x);
+
+      for (i=0;i<NB_BANDS;i++)
+      {
+         bands[i]=100;
+         for (j=i*N/NB_BANDS;j<(i+1)*N/NB_BANDS;j++)
+         {
+            bands[i] += ps[j];
+         }
+         bands[i]=log(bands[i]);
+      }
+      
+      /*p1 = .0005+.6*exp(-.5*(x-.4)*(x-.4)*11)+.1*exp(-1.2*x);
       if (x<1.5)
          p0=.1*exp(2*(x-1.5));
       else
          p0=.02+.1*exp(-.2*(x-1.5));
-      
-      p1 *= 1.0;
+      */
+
+      p0=1/(1+exp(3*(1.5-x)));
+      p1=1-p0;
+
       /*fprintf (stderr, "%f %f ", p0, p1);*/
+      /*p0 *= .99*st->speech_prob + .01*(1-st->speech_prob);
+      p1 *= .01*st->speech_prob + .99*(1-st->speech_prob);
+      
+      st->speech_prob = p0/(p1+p0);
+      */
+
+      if (mean_post > 1)
+      {
+         float adapt = 1./st->speech_bandsN++;
+         if (adapt<.005)
+            adapt = .005;
+         for (i=0;i<NB_BANDS;i++)
+         {
+            st->speech_bands[i] = (1-adapt)*st->speech_bands[i] + adapt*bands[i];
+            st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];
+         }
+      } else {
+         float adapt = 1./st->noise_bandsN++;
+         if (adapt<.005)
+            adapt = .005;
+         for (i=0;i<NB_BANDS;i++)
+         {
+            st->noise_bands[i] = (1-adapt)*st->noise_bands[i] + adapt*bands[i];
+            st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];
+         }
+      }
+      p0=p1=1;
+      for (i=0;i<NB_BANDS;i++)
+      {
+         float noise_var, speech_var;
+         float tmp1, tmp2, pr;
+         noise_var = st->noise_bands2[i] - st->noise_bands[i]*st->noise_bands[i];
+         speech_var = st->speech_bands2[i] - st->speech_bands[i]*st->speech_bands[i];
+         if (noise_var < .1)
+            noise_var = .1;
+         if (speech_var < .1)
+            speech_var = .1;
+         tmp1 = exp(-.5*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i])/speech_var)/sqrt(2*M_PI*speech_var);
+         tmp2 = exp(-.5*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i])/noise_var)/sqrt(2*M_PI*noise_var);
+         /*fprintf (stderr, "%f ", (float)(p0/(.01+p0+p1)));*/
+         /*fprintf (stderr, "%f ", (float)sqrt(bands[i]));*/
+         pr = tmp1/(1e-5+tmp1+tmp2);
+         p0 *= pr;
+         p1 *= (1-pr);
+      }
+      p0 = pow(p0,.3);
+      p1 = pow(p1,.3);
+
       p0 *= .99*st->speech_prob + .01*(1-st->speech_prob);
       p1 *= .01*st->speech_prob + .99*(1-st->speech_prob);
       
       st->speech_prob = p0/(p1+p0);
+      /*fprintf (stderr, "%f ", st->speech_prob);*/
+
       if (st->speech_prob>.5 || (st->last_speech < 10 && st->speech_prob>.25))
       {
          is_speech = 1;
@@ -405,9 +479,9 @@
       } else {
          st->last_speech++;
          if (st->last_speech<10)
-            is_speech = 1;
+           is_speech = 1;
       }
-      /*fprintf (stderr, "%f ", st->speech_prob);*/
+
    }
 
    if (st->consec_noise>=3)

<p><p>1.6       +7 -0      speex/libspeex/speex_denoise.h

Index: speex_denoise.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/speex_denoise.h,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- speex_denoise.h	21 May 2003 21:20:25 -0000	1.5
+++ speex_denoise.h	22 May 2003 16:25:33 -0000	1.6
@@ -57,6 +57,13 @@
    float *loudness_weight;   /**< */
    int    last_id;           /**< */
 
+   float *noise_bands;
+   float *noise_bands2;
+   int    noise_bandsN;
+   float *speech_bands;
+   float *speech_bands2;
+   int    speech_bandsN;
+
    float *inbuf;             /**< Input buffer (overlapped analysis) */
    float *outbuf;            /**< Output buffer (for overlap and add) */
 

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list