[xiph-commits] r11970 - trunk/speex/libspeex

Mon Oct 30 05:00:10 PST 2006

Author: jm
Date: 2006-10-30 05:00:04 -0800 (Mon, 30 Oct 2006)
New Revision: 11970

Modified:
   trunk/speex/libspeex/math_approx.h
   trunk/speex/libspeex/preprocess.c
Log:
A bit of backtracking to fix quality regressions. Need to re-convert st->noise,
the window and a priori/a posteriori SNR.


Modified: trunk/speex/libspeex/math_approx.h
===================================================================

--- trunk/speex/libspeex/math_approx.h	2006-10-29 23:27:52 UTC (rev 11969)
+++ trunk/speex/libspeex/math_approx.h	2006-10-30 13:00:04 UTC (rev 11970)
@@ -38,6 +38,8 @@
 #include "misc.h"
 
 spx_word16_t spx_cos(spx_word16_t x);
+spx_int16_t spx_ilog2(spx_uint32_t x);
+spx_int16_t spx_ilog4(spx_uint32_t x);
 
 #ifdef FIXED_POINT
 spx_word16_t spx_sqrt(spx_word32_t x);

Modified: trunk/speex/libspeex/preprocess.c
===================================================================
--- trunk/speex/libspeex/preprocess.c	2006-10-29 23:27:52 UTC (rev 11969)
+++ trunk/speex/libspeex/preprocess.c	2006-10-30 13:00:04 UTC (rev 11970)
@@ -65,6 +65,7 @@
 #include "misc.h"
 #include "fftwrap.h"
 #include "filterbank.h"
+#include "math_approx.c"
 
 #define max(a,b) ((a) > (b) ? (a) : (b))
 #define min(a,b) ((a) < (b) ? (a) : (b))
@@ -147,9 +148,9 @@
 #define SNR_SCALING_1 0.0039062f
 #define SNR_SHIFT 8
 
-#define GAIN_SCALING 32768.f
-#define GAIN_SCALING_1 3.0518e-05
-#define GAIN_SHIFT 1
+#define FRAC_SCALING 32768.f
+#define FRAC_SCALING_1 3.0518e-05
+#define FRAC_SHIFT 1
 
 #else
 
@@ -158,13 +159,12 @@
 #define SNR_SCALING 1.f
 #define SNR_SCALING_1 1.f
 #define SNR_SHIFT 0
-#define GAIN_SCALING 1.f
-#define GAIN_SCALING_1 1.f
-#define GAIN_SHIFT 0
+#define FRAC_SCALING 1.f
+#define FRAC_SCALING_1 1.f
+#define FRAC_SHIFT 0
 
 #endif
 
-
 /** Speex pre-processor state. */
 struct SpeexPreprocessState_ {
    /* Basic info */
@@ -195,8 +195,8 @@
    spx_word32_t *ps;         /**< Current power spectrum */
    float *gain2;             /**< Adjusted gains */
    float *gain_floor;        /**< Minimum gain allowed */
-   spx_word16_t *window;     /**< Analysis/Synthesis window */
-   spx_word32_t *noise;      /**< Noise estimate */
+   float *window;            /**< Analysis/Synthesis window */
+   float *noise;             /**< Noise estimate */
    spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
    spx_word32_t *old_ps;     /**< Power spectrum for last frame */
    float *gain;              /**< Ephraim Malah gain */
@@ -226,16 +226,17 @@
    int    nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */
    int    min_count;         /**< Number of frames processed so far */
    void  *fft_lookup;        /**< Lookup table for the FFT */
-
+#ifdef FIXED_POINT
+   int    frame_shift;
+#endif
 };
 
 
-static void conj_window(spx_word16_t *w, int len)
+static void conj_window(float *w, int len)
 {
    int i;
    for (i=0;i<len;i++)
    {
-      float tmp;
       float x=4*((float)i)/len;
       int inv=0;
       if (x<1)
@@ -252,10 +253,10 @@
          x=4-x;
       }
       x*=1.9979;
-      tmp=(.5-.5*cos(x))*(.5-.5*cos(x));
+      w[i]=(.5-.5*cos(x))*(.5-.5*cos(x));
       if (inv)
-         tmp=1-tmp;
-      w[i]=QCONST16(.999,15)*sqrt(tmp);
+         w[i]=1-w[i];
+      w[i]=sqrt(w[i]);
    }
 }
 
@@ -344,11 +345,11 @@
    st->bank = filterbank_new(M, sampling_rate, N, 1);
    
    st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(float));
-   st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(float));
+   st->window = (float*)speex_alloc(2*N*sizeof(float));
    st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(float));
    
    st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(float));
-   st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(float));
+   st->noise = (float*)speex_alloc((N+M)*sizeof(float));
    st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(float));
    st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(float));
    st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(float));
@@ -371,7 +372,7 @@
 
    conj_window(st->window, 2*N3);
    for (i=2*N3;i<2*st->ps_size;i++)
-      st->window[i]=QCONST16(.999,15);
+      st->window[i]=1;
    
    if (N4>0)
    {
@@ -531,8 +532,19 @@
 
    /* Windowing */
    for (i=0;i<2*N;i++)
-      st->frame[i] = MULT16_16_Q15(st->window[i], st->frame[i]);
+      st->frame[i] *= st->window[i];
 
+#ifdef FIXED_POINT
+   {
+      spx_word16_t max_val=0;
+      for (i=0;i<2*N;i++)
+         max_val = MAX16(max_val, ABS16(st->frame[i]));
+      st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
+      for (i=0;i<2*N;i++)
+         st->frame[i] = SHL16(st->frame[i], st->frame_shift);
+   }
+#endif
+   
    /* Perform FFT */
    spx_fft(st->fft_lookup, st->frame, st->ft);
          
@@ -540,6 +552,8 @@
    ps[0]=1;
    for (i=1;i<N;i++)
       ps[i]=1+MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
+   for (i=0;i<N;i++)
+      st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
 
    filterbank_compute_bank32(st->bank, ps, ps+N);
 }
@@ -654,7 +668,7 @@
       if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
          st->noise[i] = beta_1*st->noise[i] + beta*NOISE_OVERCOMPENS*st->ps[i];
    }
-   filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
+   filterbank_compute_bank(st->bank, st->noise, st->noise+N);
 
    /* Special case for first frame */
    if (st->nb_adapt==1)
@@ -665,14 +679,14 @@
    for (i=0;i<N+M;i++)
    {
       float gamma = .1;
-      spx_word32_t tot_noise = 1+st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
-      st->post[i] = SNR_SCALING_1*SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,8));
+      spx_word32_t tot_noise = 1.f+ st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
+      st->post[i] = 1.f*ps[i]/tot_noise - 1.f;
       if (st->post[i]>100.f)
          st->post[i]=100.f;
       /*gamma = .15+.85*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));*/
-      gamma = .1+.9*GAIN_SCALING_1*SQR16_Q15(DIV32_16_Q15(st->old_ps[i],(st->old_ps[i]+tot_noise)));
+      gamma = .1+.9*(st->old_ps[i]/(1.f+st->old_ps[i]+tot_noise))*(st->old_ps[i]/(1.f+st->old_ps[i]+tot_noise));
       /* A priori SNR update */
-      st->prior[i] = gamma*max(0.0f,st->post[i]) + (1.f-gamma)*SNR_SCALING_1*DIV32_16_Q8(st->old_ps[i],tot_noise);
+      st->prior[i] = gamma*max(0.0f,st->post[i]) + (1.f-gamma)*st->old_ps[i]/tot_noise;
       if (st->prior[i]>100.f)
          st->prior[i]=100.f;
    }
@@ -771,7 +785,7 @@
          /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
          
          /* Take into account speech probability of presence (loudness domain MMSE estimator) */
-         st->gain2[i]=SQR(p*sqrt(st->gain[i])+sqrt(st->gain_floor[i])*(1-p));
+         st->gain2[i]=(p*sqrt(st->gain[i])+sqrt(st->gain_floor[i])*(1-p)) * (p*sqrt(st->gain[i])+sqrt(st->gain_floor[i])*(1-p));
          
          /* Use this if you want a log-domain MMSE estimator instead */
          /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
@@ -809,6 +823,8 @@
 
    /* Inverse FFT with 1/N scaling */
    spx_ifft(st->fft_lookup, st->ft, st->frame);
+   for (i=0;i<2*N;i++)
+      st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
 
    {
       float max_sample=0;
@@ -824,7 +840,7 @@
    }
 
    for (i=0;i<2*N;i++)
-      st->frame[i] = MULT16_16_Q15(st->window[i], st->frame[i]);
+      st->frame[i] *= st->window[i];
 
    /* Perform overlap and add */
    for (i=0;i<N3;i++)
@@ -879,7 +895,7 @@
    }
 
    for (i=0;i<N3;i++)
-      st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
+      st->outbuf[i] = x[st->frame_size-N3+i]*st->window[st->frame_size+i];
 
    /* Save old power spectrum */
    for (i=0;i<N+M;i++)