[xiph-commits] r11910 - in trunk/speex: include/speex libspeex

jm at svn.xiph.org jm at svn.xiph.org
Tue Oct 10 07:46:27 PDT 2006


Author: jm
Date: 2006-10-10 07:46:13 -0700 (Tue, 10 Oct 2006)
New Revision: 11910

Added:
   trunk/speex/libspeex/filterbank.c
   trunk/speex/libspeex/filterbank.h
Modified:
   trunk/speex/include/speex/speex_preprocess.h
   trunk/speex/libspeex/Makefile.am
   trunk/speex/libspeex/fftwrap.c
   trunk/speex/libspeex/preprocess.c
Log:
Big preprocessor cleanup. Implemented mel filterbank. Removed crappy VAD.
Use the FFT wrapper now.


Modified: trunk/speex/include/speex/speex_preprocess.h
===================================================================
--- trunk/speex/include/speex/speex_preprocess.h	2006-10-10 14:41:51 UTC (rev 11909)
+++ trunk/speex/include/speex/speex_preprocess.h	2006-10-10 14:46:13 UTC (rev 11910)
@@ -41,71 +41,11 @@
 extern "C" {
 #endif
 
-struct drft_lookup;
+struct SpeexPreprocessState_;
 
-/** Speex pre-processor state. */
-typedef struct SpeexPreprocessState {
-   int    frame_size;        /**< Number of samples processed each time */
-   int    ps_size;           /**< Number of points in the power spectrum */
-   int    sampling_rate;     /**< Sampling rate of the input/output */
-   
-   /* parameters */
-   int    denoise_enabled;
-   int    agc_enabled;
-   float  agc_level;
-   int    vad_enabled;
-   int    dereverb_enabled;
-   float  reverb_decay;
-   float  reverb_level;
-   float  speech_prob_start;
-   float  speech_prob_continue;
-   
-   float *frame;             /**< Processing frame (2*ps_size) */
-   float *ps;                /**< Current power spectrum */
-   float *gain2;             /**< Adjusted gains */
-   float *window;            /**< Analysis/Synthesis window */
-   float *noise;             /**< Noise estimate */
-   float *reverb_estimate;   /**< Estimate of reverb energy */
-   float *old_ps;            /**< Power spectrum for last frame */
-   float *gain;              /**< Ephraim Malah gain */
-   float *prior;             /**< A-priori SNR */
-   float *post;              /**< A-posteriori SNR */
+typedef struct SpeexPreprocessState_ SpeexPreprocessState;
 
-   float *S;                 /**< Smoothed power spectrum */
-   float *Smin;              /**< See Cohen paper */
-   float *Stmp;              /**< See Cohen paper */
-   float *update_prob;       /**< Propability of speech presence for noise update */
 
-   float *zeta;              /**< Smoothed a priori SNR */
-   float  Zpeak;
-   float  Zlast;
-
-   float *loudness_weight;   /**< Perceptual loudness curve */
-
-   float *echo_noise;
-
-   float *noise_bands;
-   float *noise_bands2;
-   int    noise_bandsN;
-   float *speech_bands;
-   float *speech_bands2;
-   int    speech_bandsN;
-
-   float *inbuf;             /**< Input buffer (overlapped analysis) */
-   float *outbuf;            /**< Output buffer (for overlap and add) */
-
-   float  speech_prob;
-   int    last_speech;
-   float  loudness;          /**< loudness estimate */
-   float  loudness2;         /**< loudness estimate */
-   int    nb_adapt;          /**< Number of frames used for adaptation so far */
-   int    nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */
-   int    consec_noise;      /**< Number of consecutive noise frames */
-   int    nb_preprocess;     /**< Number of frames processed so far */
-   struct drft_lookup *fft_lookup;   /**< Lookup table for the FFT */
-
-} SpeexPreprocessState;
-
 /** Creates a new preprocessing state */
 SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate);
 

Modified: trunk/speex/libspeex/Makefile.am
===================================================================
--- trunk/speex/libspeex/Makefile.am	2006-10-10 14:41:51 UTC (rev 11909)
+++ trunk/speex/libspeex/Makefile.am	2006-10-10 14:46:13 UTC (rev 11910)
@@ -16,7 +16,7 @@
 				exc_10_16_table.c 	exc_20_32_table.c 	hexc_10_32_table.c 	misc.c 	speex_header.c \
 				speex_callbacks.c 	math_approx.c 	stereo.c 	preprocess.c 	smallft.c 	lbr_48k_tables.c \
 				jitter.c 	mdf.c vorbis_psy.c fftwrap.c kiss_fft.c _kiss_fft_guts.h kiss_fft.h \
-	kiss_fftr.c kiss_fftr.h window.c
+	kiss_fftr.c kiss_fftr.h window.c filterbank.c
 
 noinst_HEADERS = lsp.h 	nb_celp.h 	lpc.h 	lpc_bfin.h 	ltp.h 	quant_lsp.h \
 				cb_search.h 	filters.h 	stack_alloc.h 	vq.h 	vq_sse.h 	vq_arm4.h 	vq_bfin.h \
@@ -24,7 +24,7 @@
 				ltp_bfin.h 	filters_sse.h 	filters_arm4.h 	filters_bfin.h 	math_approx.h \
 				smallft.h 	arch.h 	fixed_arm4.h 	fixed_arm5e.h 	fixed_bfin.h 	fixed_debug.h \
 				fixed_generic.h 	cb_search_sse.h 	cb_search_arm4.h 	cb_search_bfin.h vorbis_psy.h \
-		fftwrap.h pseudofloat.h lsp_bfin.h quant_lsp_bfin.h
+		fftwrap.h pseudofloat.h lsp_bfin.h quant_lsp_bfin.h filterbank.h
 
 
 libspeex_la_LDFLAGS = -no-undefined -version-info @SPEEX_LT_CURRENT@:@SPEEX_LT_REVISION@:@SPEEX_LT_AGE@

Modified: trunk/speex/libspeex/fftwrap.c
===================================================================
--- trunk/speex/libspeex/fftwrap.c	2006-10-10 14:41:51 UTC (rev 11909)
+++ trunk/speex/libspeex/fftwrap.c	2006-10-10 14:46:13 UTC (rev 11910)
@@ -36,8 +36,8 @@
 #include "config.h"
 #endif
 
-/*#define USE_SMALLFT*/
-#define USE_KISS_FFT
+#define USE_SMALLFT
+/*#define USE_KISS_FFT*/
 
 
 #include "misc.h"
@@ -120,7 +120,6 @@
 {
    if (in==out)
    {
-      int i;
       speex_warning("FFT should not be done in-place");
    } else {
       int i;

Added: trunk/speex/libspeex/filterbank.c
===================================================================
--- trunk/speex/libspeex/filterbank.c	2006-10-10 14:41:51 UTC (rev 11909)
+++ trunk/speex/libspeex/filterbank.c	2006-10-10 14:46:13 UTC (rev 11910)
@@ -0,0 +1,140 @@
+/* Copyright (C) 2006 Jean-Marc Valin */
+/**
+   @file filterbank.c
+   @brief Converting between psd and filterbank
+ */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "filterbank.h"
+#include "misc.h"
+#include <math.h>
+
+FilterBank *filterbank_new(int banks, float max_freq, float sampling, int len, int type)
+{
+   FilterBank *bank;
+   float df, max_mel, mel_interval;
+   int i;
+   df = .5*sampling/len;
+   max_mel = 2595*log10(1+max_freq/700);
+   mel_interval = max_mel/(banks-1);
+   
+   bank = speex_alloc(sizeof(FilterBank));
+   bank->nb_banks = banks;
+   bank->len = len;
+   bank->bank_left = speex_alloc(len*sizeof(int));
+   bank->bank_right = speex_alloc(len*sizeof(int));
+   bank->filter_left = speex_alloc(len*sizeof(float));
+   bank->filter_right = speex_alloc(len*sizeof(float));
+   bank->scaling = speex_alloc(banks*sizeof(float));
+   int id1;
+   int id2;
+
+   for (i=0;i<len;i++)
+   {
+      float curr_freq;
+      float mel;
+      float val;
+      curr_freq = i*df;
+      mel = 2595*log10(1+curr_freq/700);
+      if (mel > max_mel)
+         break;
+      id1 = (int)(floor(mel/mel_interval));
+      if (id1>banks-2)
+      {
+         id1 = banks-2;
+         val = 1;
+      } else {
+         val = (mel - id1*mel_interval)/mel_interval;
+      }
+      id2 = id1+1;
+      bank->bank_left[i] = id1;
+      bank->filter_left[i] = 1-val;
+      bank->bank_right[i] = id2;
+      bank->filter_right[i] = val;
+   }
+   
+   for (i=0;i<bank->nb_banks;i++)
+      bank->scaling[i] = 0;
+   for (i=0;i<bank->len;i++)
+   {
+      int id = bank->bank_left[i];
+      bank->scaling[id] += bank->filter_left[i];
+      id = bank->bank_right[i];
+      bank->scaling[id] += bank->filter_right[i];
+   }
+   for (i=0;i<bank->nb_banks;i++)
+      bank->scaling[i] = 1./(bank->scaling[i]);
+
+   return bank;
+}
+
+void filterbank_destroy(FilterBank *bank)
+{
+   speex_free(bank->bank_left);
+   speex_free(bank->bank_right);
+   speex_free(bank->filter_left);
+   speex_free(bank->filter_right);
+   speex_free(bank->scaling);
+   speex_free(bank);
+}
+
+void filterbank_compute_bank(FilterBank *bank, float *ps, float *mel)
+{
+   int i;
+   for (i=0;i<bank->nb_banks;i++)
+      mel[i] = 0;
+
+   for (i=0;i<bank->len;i++)
+   {
+      int id = bank->bank_left[i];
+      mel[id] += bank->filter_left[i]*ps[i];
+      id = bank->bank_right[i];
+      mel[id] += bank->filter_right[i]*ps[i];
+   }
+   for (i=0;i<bank->nb_banks;i++)
+      mel[i] *= bank->scaling[i];
+
+}
+
+void filterbank_compute_psd(FilterBank *bank, float *mel, float *ps)
+{
+   int i;
+   for (i=0;i<bank->len;i++)
+   {
+      int id = bank->bank_left[i];
+      ps[i] = mel[id]*bank->filter_left[i];
+      id = bank->bank_right[i];
+      ps[i] += mel[id]*bank->filter_right[i];
+   }
+}
+

Added: trunk/speex/libspeex/filterbank.h
===================================================================
--- trunk/speex/libspeex/filterbank.h	2006-10-10 14:41:51 UTC (rev 11909)
+++ trunk/speex/libspeex/filterbank.h	2006-10-10 14:46:13 UTC (rev 11910)
@@ -0,0 +1,59 @@
+/* Copyright (C) 2006 Jean-Marc Valin */
+/**
+   @file filterbank.h
+   @brief Converting between psd and filterbank
+ */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef FILTERBANK_H
+#define FILTERBANK_H
+
+#include "misc.h"
+
+typedef struct {
+   int *bank_left;
+   int *bank_right;
+   float *filter_left;
+   float *filter_right;
+   float *scaling;
+   int nb_banks;
+   int len;
+} FilterBank;
+
+
+FilterBank *filterbank_new(int banks, float max_freq, float sampling, int len, int type);
+
+void filterbank_destroy(FilterBank *bank);
+
+void filterbank_compute_bank(FilterBank *bank, float *psd, float *mel);
+
+void filterbank_compute_psd(FilterBank *bank, float *mel, float *psd);
+
+
+#endif

Modified: trunk/speex/libspeex/preprocess.c
===================================================================
--- trunk/speex/libspeex/preprocess.c	2006-10-10 14:41:51 UTC (rev 11909)
+++ trunk/speex/libspeex/preprocess.c	2006-10-10 14:46:13 UTC (rev 11910)
@@ -38,7 +38,8 @@
 #include <math.h>
 #include "speex/speex_preprocess.h"
 #include "misc.h"
-#include "smallft.h"
+#include "fftwrap.h"
+#include "filterbank.h"
 
 #define max(a,b) ((a) > (b) ? (a) : (b))
 #define min(a,b) ((a) < (b) ? (a) : (b))
@@ -60,6 +61,74 @@
 #define ZMIN_1 10
 #define LOG_MIN_MAX_1 0.86859
 
+
+/** Speex pre-processor state. */
+struct SpeexPreprocessState_ {
+   int    frame_size;        /**< Number of samples processed each time */
+   int    ps_size;           /**< Number of points in the power spectrum */
+   int    sampling_rate;     /**< Sampling rate of the input/output */
+   
+   /* parameters */
+   int    denoise_enabled;
+   int    agc_enabled;
+   float  agc_level;
+   int    vad_enabled;
+   int    dereverb_enabled;
+   float  reverb_decay;
+   float  reverb_level;
+   float  speech_prob_start;
+   float  speech_prob_continue;
+   
+   FilterBank *bank;
+   
+   float *frame;             /**< Processing frame (2*ps_size) */
+   float *ft;                /**< Processing frame in freq domain (2*ps_size) */
+   float *ps;                /**< Current power spectrum */
+   float *gain2;             /**< Adjusted gains */
+   float *window;            /**< Analysis/Synthesis window */
+   float *noise;             /**< Noise estimate */
+   float *reverb_estimate;   /**< Estimate of reverb energy */
+   float *old_ps;            /**< Power spectrum for last frame */
+   float *gain;              /**< Ephraim Malah gain */
+   float *prior;             /**< A-priori SNR */
+   float *post;              /**< A-posteriori SNR */
+
+   float *S;                 /**< Smoothed power spectrum */
+   float *Smin;              /**< See Cohen paper */
+   float *Stmp;              /**< See Cohen paper */
+   float *update_prob;       /**< Propability of speech presence for noise update */
+
+   float *zeta;              /**< Smoothed a priori SNR */
+   float  Zpeak;
+   float  Zlast;
+
+   float *loudness_weight;   /**< Perceptual loudness curve */
+
+   spx_int32_t *echo_noise;
+
+   float *noise_bands;
+   float *noise_bands2;
+   int    noise_bandsN;
+   float *speech_bands;
+   float *speech_bands2;
+   int    speech_bandsN;
+
+   float *inbuf;             /**< Input buffer (overlapped analysis) */
+   float *outbuf;            /**< Output buffer (for overlap and add) */
+
+   float  speech_prob;
+   int    last_speech;
+   float  loudness;          /**< loudness estimate */
+   float  loudness2;         /**< loudness estimate */
+   int    nb_adapt;          /**< Number of frames used for adaptation so far */
+   int    nb_loudness_adapt; /**< Number of frames used for loudness adaptation so far */
+   int    consec_noise;      /**< Number of consecutive noise frames */
+   int    nb_preprocess;     /**< Number of frames processed so far */
+   void *fft_lookup;   /**< Lookup table for the FFT */
+
+};
+
+
 static void conj_window(float *w, int len)
 {
    int i;
@@ -163,7 +232,10 @@
    st->speech_prob_start = SPEEX_PROB_START_DEFAULT;
    st->speech_prob_continue = SPEEX_PROB_CONTINUE_DEFAULT;
 
+   st->bank = filterbank_new(24, 4000, 8000, N, 1);
+         
    st->frame = (float*)speex_alloc(2*N*sizeof(float));
+   st->ft = (float*)speex_alloc(2*N*sizeof(float));
    st->ps = (float*)speex_alloc(N*sizeof(float));
    st->gain2 = (float*)speex_alloc(N*sizeof(float));
    st->window = (float*)speex_alloc(2*N*sizeof(float));
@@ -176,7 +248,7 @@
    st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
    st->inbuf = (float*)speex_alloc(N3*sizeof(float));
    st->outbuf = (float*)speex_alloc(N3*sizeof(float));
-   st->echo_noise = (float*)speex_alloc(N*sizeof(float));
+   st->echo_noise = (spx_int32_t*)speex_alloc(N*sizeof(float));
 
    st->S = (float*)speex_alloc(N*sizeof(float));
    st->Smin = (float*)speex_alloc(N*sizeof(float));
@@ -236,8 +308,7 @@
    st->loudness2 = 6000;
    st->nb_loudness_adapt = 0;
 
-   st->fft_lookup = (struct drft_lookup*)speex_alloc(sizeof(struct drft_lookup));
-   spx_drft_init(st->fft_lookup,2*N);
+   st->fft_lookup = spx_fft_init(2*N);
 
    st->nb_adapt=0;
    st->consec_noise=0;
@@ -248,6 +319,7 @@
 void speex_preprocess_state_destroy(SpeexPreprocessState *st)
 {
    speex_free(st->frame);
+   speex_free(st->ft);
    speex_free(st->ps);
    speex_free(st->gain2);
    speex_free(st->window);
@@ -274,237 +346,16 @@
    speex_free(st->inbuf);
    speex_free(st->outbuf);
 
-   spx_drft_clear(st->fft_lookup);
-   speex_free(st->fft_lookup);
+   spx_fft_destroy(st->fft_lookup);
 
    speex_free(st);
 }
 
-static void update_noise(SpeexPreprocessState *st, float *ps, spx_int32_t *echo)
+static void speex_compute_agc(SpeexPreprocessState *st)
 {
    int i;
-   float beta;
-   st->nb_adapt++;
-   beta=1.0f/st->nb_adapt;
-   if (beta < .05f)
-      beta=.05f;
-   
-   if (!echo)
-   {
-      for (i=0;i<st->ps_size;i++)
-         st->noise[i] = (1.f-beta)*st->noise[i] + beta*ps[i];
-   } else {
-      for (i=0;i<st->ps_size;i++)
-         st->noise[i] = (1.f-beta)*st->noise[i] + beta*max(1.f,ps[i]-st->frame_size*st->frame_size*1.0*echo[i]); 
-#if 0
-      for (i=0;i<st->ps_size;i++)
-         st->noise[i] = 0;
-#endif
-   }
-}
-
-static int speex_compute_vad(SpeexPreprocessState *st, float *ps, float mean_prior, float mean_post)
-{
-   int i, is_speech=0;
    int N = st->ps_size;
    float scale=.5f/N;
-
-   /* FIXME: Clean this up a bit */
-   {
-      float bands[NB_BANDS];
-      int j;
-      float p0, p1;
-      float tot_loudness=0;
-      float x = sqrt(mean_post);
-
-      for (i=5;i<N-10;i++)
-      {
-         tot_loudness += scale*st->ps[i] * st->loudness_weight[i];
-      }
-
-      for (i=0;i<NB_BANDS;i++)
-      {
-         bands[i]=1e4f;
-         for (j=i*N/NB_BANDS;j<(i+1)*N/NB_BANDS;j++)
-         {
-            bands[i] += ps[j];
-         }
-         bands[i]=log(bands[i]);
-      }
-      
-      /*p1 = .0005+.6*exp(-.5*(x-.4)*(x-.4)*11)+.1*exp(-1.2*x);
-      if (x<1.5)
-         p0=.1*exp(2*(x-1.5));
-      else
-         p0=.02+.1*exp(-.2*(x-1.5));
-      */
-
-      p0=1.f/(1.f+exp(3.f*(1.5f-x)));
-      p1=1.f-p0;
-
-      /*fprintf (stderr, "%f %f ", p0, p1);*/
-      /*p0 *= .99*st->speech_prob + .01*(1-st->speech_prob);
-      p1 *= .01*st->speech_prob + .99*(1-st->speech_prob);
-      
-      st->speech_prob = p0/(p1+p0);
-      */
-
-      if (st->noise_bandsN < 50 || st->speech_bandsN < 50)
-      {
-         if (mean_post > 5.f)
-         {
-            float adapt = 1./st->speech_bandsN++;
-            if (adapt<.005f)
-               adapt = .005f;
-            for (i=0;i<NB_BANDS;i++)
-            {
-               st->speech_bands[i] = (1.f-adapt)*st->speech_bands[i] + adapt*bands[i];
-               /*st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];*/
-               st->speech_bands2[i] = (1.f-adapt)*st->speech_bands2[i] + adapt*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i]);
-            }
-         } else {
-            float adapt = 1./st->noise_bandsN++;
-            if (adapt<.005f)
-               adapt = .005f;
-            for (i=0;i<NB_BANDS;i++)
-            {
-               st->noise_bands[i] = (1.f-adapt)*st->noise_bands[i] + adapt*bands[i];
-               /*st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];*/
-               st->noise_bands2[i] = (1.f-adapt)*st->noise_bands2[i] + adapt*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i]);
-            }
-         }
-      }
-      p0=p1=1;
-      for (i=0;i<NB_BANDS;i++)
-      {
-         float noise_var, speech_var;
-         float noise_mean, speech_mean;
-         float tmp1, tmp2, pr;
-
-         /*noise_var = 1.01*st->noise_bands2[i] - st->noise_bands[i]*st->noise_bands[i];
-           speech_var = 1.01*st->speech_bands2[i] - st->speech_bands[i]*st->speech_bands[i];*/
-         noise_var = st->noise_bands2[i];
-         speech_var = st->speech_bands2[i];
-         if (noise_var < .1f)
-            noise_var = .1f;
-         if (speech_var < .1f)
-            speech_var = .1f;
-         
-         /*speech_var = sqrt(speech_var*noise_var);
-           noise_var = speech_var;*/
-         if (noise_var < .05f*speech_var)
-            noise_var = .05f*speech_var; 
-         if (speech_var < .05f*noise_var)
-            speech_var = .05f*noise_var;
-         
-         if (bands[i] < st->noise_bands[i])
-            speech_var = noise_var;
-         if (bands[i] > st->speech_bands[i])
-            noise_var = speech_var;
-
-         speech_mean = st->speech_bands[i];
-         noise_mean = st->noise_bands[i];
-         if (noise_mean < speech_mean - 5.f)
-            noise_mean = speech_mean - 5.f;
-
-         tmp1 = exp(-.5f*(bands[i]-speech_mean)*(bands[i]-speech_mean)/speech_var)/sqrt(2.f*M_PI*speech_var);
-         tmp2 = exp(-.5f*(bands[i]-noise_mean)*(bands[i]-noise_mean)/noise_var)/sqrt(2.f*M_PI*noise_var);
-         /*fprintf (stderr, "%f ", (float)(p0/(.01+p0+p1)));*/
-         /*fprintf (stderr, "%f ", (float)(bands[i]));*/
-         pr = tmp1/(1e-25+tmp1+tmp2);
-         /*if (bands[i] < st->noise_bands[i])
-            pr=.01;
-         if (bands[i] > st->speech_bands[i] && pr < .995)
-         pr=.995;*/
-         if (pr>.999f)
-            pr=.999f;
-         if (pr<.001f)
-            pr=.001f;
-         /*fprintf (stderr, "%f ", pr);*/
-         p0 *= pr;
-         p1 *= (1-pr);
-      }
-
-      p0 = pow(p0,.2);
-      p1 = pow(p1,.2);      
-      
-#if 1
-      p0 *= 2.f;
-      p0=p0/(p1+p0);
-      if (st->last_speech>20) 
-      {
-         float tmp = sqrt(tot_loudness)/st->loudness2;
-         tmp = 1.f-exp(-10.f*tmp);
-         if (p0>tmp)
-            p0=tmp;
-      }
-      p1=1-p0;
-#else
-      if (sqrt(tot_loudness) < .6f*st->loudness2 && p0>15.f*p1)
-         p0=15.f*p1;
-      if (sqrt(tot_loudness) < .45f*st->loudness2 && p0>7.f*p1)
-         p0=7.f*p1;
-      if (sqrt(tot_loudness) < .3f*st->loudness2 && p0>3.f*p1)
-         p0=3.f*p1;
-      if (sqrt(tot_loudness) < .15f*st->loudness2 && p0>p1)
-         p0=p1;
-      /*fprintf (stderr, "%f %f ", (float)(sqrt(tot_loudness) /( .25*st->loudness2)), p0/(p1+p0));*/
-#endif
-
-      p0 *= .99f*st->speech_prob + .01f*(1-st->speech_prob);
-      p1 *= .01f*st->speech_prob + .99f*(1-st->speech_prob);
-      
-      st->speech_prob = p0/(1e-25f+p1+p0);
-      /*fprintf (stderr, "%f %f %f ", tot_loudness, st->loudness2, st->speech_prob);*/
-
-      if (st->speech_prob > st->speech_prob_start
-         || (st->last_speech < 20 && st->speech_prob > st->speech_prob_continue))
-      {
-         is_speech = 1;
-         st->last_speech = 0;
-      } else {
-         st->last_speech++;
-         if (st->last_speech<20)
-           is_speech = 1;
-      }
-
-      if (st->noise_bandsN > 50 && st->speech_bandsN > 50)
-      {
-         if (mean_post > 5)
-         {
-            float adapt = 1./st->speech_bandsN++;
-            if (adapt<.005f)
-               adapt = .005f;
-            for (i=0;i<NB_BANDS;i++)
-            {
-               st->speech_bands[i] = (1-adapt)*st->speech_bands[i] + adapt*bands[i];
-               /*st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*bands[i]*bands[i];*/
-               st->speech_bands2[i] = (1-adapt)*st->speech_bands2[i] + adapt*(bands[i]-st->speech_bands[i])*(bands[i]-st->speech_bands[i]);
-            }
-         } else {
-            float adapt = 1./st->noise_bandsN++;
-            if (adapt<.005f)
-               adapt = .005f;
-            for (i=0;i<NB_BANDS;i++)
-            {
-               st->noise_bands[i] = (1-adapt)*st->noise_bands[i] + adapt*bands[i];
-               /*st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*bands[i]*bands[i];*/
-               st->noise_bands2[i] = (1-adapt)*st->noise_bands2[i] + adapt*(bands[i]-st->noise_bands[i])*(bands[i]-st->noise_bands[i]);
-            }
-         }
-      }
-
-
-   }
-
-   return is_speech;
-}
-
-static void speex_compute_agc(SpeexPreprocessState *st, float mean_prior)
-{
-   int i;
-   int N = st->ps_size;
-   float scale=.5f/N;
    float agc_gain;
    int freq_start, freq_end;
    float active_bands = 0;
@@ -582,12 +433,12 @@
       st->frame[i] *= st->window[i];
 
    /* Perform FFT */
-   spx_drft_forward(st->fft_lookup, st->frame);
-
+   spx_fft_float(st->fft_lookup, st->frame, st->ft);
+         
    /* Power spectrum */
    ps[0]=1;
    for (i=1;i<N;i++)
-      ps[i]=1+st->frame[2*i-1]*st->frame[2*i-1] + st->frame[2*i]*st->frame[2*i];
+      ps[i]=1+st->ft[2*i-1]*st->ft[2*i-1] + st->ft[2*i]*st->ft[2*i];
 
 }
 
@@ -635,15 +486,27 @@
 int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
 {
    int i;
-   int is_speech=1;
-   float mean_post=0;
-   float mean_prior=0;
    int N = st->ps_size;
    int N3 = 2*N - st->frame_size;
    int N4 = st->frame_size - N3;
-   float scale=.5f/N;
+   /*float scale=.5f/N;*/
    float *ps=st->ps;
    float Zframe=0, Pframe;
+   float beta, beta_1;
+   
+   st->nb_adapt++;
+   beta =1.0f/st->nb_adapt;
+   if (beta < .05f)
+      beta=.05f;
+   beta_1 = 1.0f-beta;
+   
+   /* Deal with residual echo if provided */
+   if (echo)
+      for (i=0;i<N;i++)
+         st->echo_noise[i] = (.3f*st->echo_noise[i] + echo[i]);
+   else
+      for (i=0;i<N;i++)
+         st->echo_noise[i] = 0;
 
    preprocess_analysis(st, x);
 
@@ -651,56 +514,40 @@
 
    st->nb_preprocess++;
 
-   /* Noise estimation always updated for the 20 first times */
+   /* Noise estimation always updated for the 10 first frames */
    if (st->nb_adapt<10)
    {
-      update_noise(st, ps, echo);
+      for (i=1;i<N-1;i++)
+         st->update_prob[i] = 0;
    }
 
-   /* Deal with residual echo if provided */
-   if (echo)
-      for (i=1;i<N;i++)
-         st->echo_noise[i] = (.3f*st->echo_noise[i] + st->frame_size*st->frame_size*1.0*echo[i]);
+   for (i=1;i<N-1;i++)
+   {
+      if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/)
+         st->noise[i] = beta_1*st->noise[i] + beta*st->ps[i];
+   }
 
+   /* Special case for first frame */
+   if (st->nb_adapt==1)
+      for (i=0;i<N;i++)
+         st->old_ps[i] = ps[i];
+
    /* Compute a posteriori SNR */
    for (i=1;i<N;i++)
    {
+      float gamma;
       float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
       st->post[i] = ps[i]/tot_noise - 1.f;
       if (st->post[i]>100.f)
          st->post[i]=100.f;
-      /*if (st->post[i]<0)
-        st->post[i]=0;*/
-      mean_post+=st->post[i];
+      gamma = .15+.85*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));
+      /* A priori SNR update */
+      st->prior[i] = gamma*max(0.0f,st->post[i]) +
+            (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]);
+      if (st->prior[i]>100.f)
+         st->prior[i]=100.f;
    }
-   mean_post /= N;
-   if (mean_post<0.f)
-      mean_post=0.f;
 
-   /* Special case for first frame */
-   if (st->nb_adapt==1)
-      for (i=1;i<N;i++)
-         st->old_ps[i] = ps[i];
-
-   /* Compute a priori SNR */
-   {
-      /* A priori update rate */
-      for (i=1;i<N;i++)
-      {
-         float gamma = .15+.85*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));
-         float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
-         /* A priori SNR update */
-         st->prior[i] = gamma*max(0.0f,st->post[i]) +
-               (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]);
-         
-         if (st->prior[i]>100.f)
-            st->prior[i]=100.f;
-         
-         mean_prior+=st->prior[i];
-      }
-   }
-   mean_prior /= N;
-
 #if 0
    for (i=0;i<N;i++)
    {
@@ -708,52 +555,8 @@
    }
    fprintf (stderr, "\n");
 #endif
-   /*fprintf (stderr, "%f %f\n", mean_prior,mean_post);*/
 
-   if (st->nb_preprocess>=20)
-   {
-      int do_update = 0;
-      float noise_ener=0, sig_ener=0;
-      /* If SNR is low (both a priori and a posteriori), update the noise estimate*/
-      /*if (mean_prior<.23 && mean_post < .5)*/
-      if (mean_prior<.23f && mean_post < .5f)
-         do_update = 1;
-      for (i=1;i<N;i++)
-      {
-         noise_ener += st->noise[i];
-         sig_ener += ps[i];
-      }
-      if (noise_ener > 3.f*sig_ener)
-         do_update = 1;
-      /*do_update = 0;*/
-      if (do_update)
-      {
-         st->consec_noise++;
-      } else {
-         st->consec_noise=0;
-      }
-   }
 
-   if (st->vad_enabled)
-      is_speech = speex_compute_vad(st, ps, mean_prior, mean_post);
-
-
-   if (st->consec_noise>=3)
-   {
-      update_noise(st, st->old_ps, echo);
-   } else {
-      for (i=1;i<N-1;i++)
-      {
-         if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/)
-         {
-            if (echo)
-               st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]);
-            else
-               st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i];
-         }
-      }
-   }
-
    for (i=1;i<N;i++)
    {
       st->zeta[i] = .7f*st->zeta[i] + .3f*st->prior[i];
@@ -823,6 +626,12 @@
    
    st->gain2[0]=st->gain[0]=0.f;
    st->gain2[N-1]=st->gain[N-1]=0.f;
+   
+   if (1) {
+      float m[24];
+      filterbank_compute_bank(st->bank, st->gain2, m);
+      filterbank_compute_psd(st->bank,m, st->gain2);
+   }
    /*
    for (i=30;i<N-2;i++)
    {
@@ -832,7 +641,7 @@
       st->gain2[i] = st->gain[i];
    */
    if (st->agc_enabled)
-      speex_compute_agc(st, mean_prior);
+      speex_compute_agc(st);
 
 #if 0
    if (!is_speech)
@@ -851,23 +660,23 @@
    /* Apply computed gain */
    for (i=1;i<N;i++)
    {
-      st->frame[2*i-1] *= st->gain2[i];
-      st->frame[2*i] *= st->gain2[i];
+      st->ft[2*i-1] *= st->gain2[i];
+      st->ft[2*i] *= st->gain2[i];
    }
 
    /* Get rid of the DC and very low frequencies */
-   st->frame[0]=0;
-   st->frame[1]=0;
-   st->frame[2]=0;
+   st->ft[0]=0;
+   st->ft[1]=0;
+   st->ft[2]=0;
    /* Nyquist frequency is mostly useless too */
-   st->frame[2*N-1]=0;
+   st->ft[2*N-1]=0;
 
    /* Inverse FFT with 1/N scaling */
-   spx_drft_backward(st->fft_lookup, st->frame);
+   spx_ifft_float(st->fft_lookup, st->ft, st->frame);
+         
+   /*for (i=0;i<2*N;i++)
+    st->frame[i] *= scale;*/
 
-   for (i=0;i<2*N;i++)
-      st->frame[i] *= scale;
-
    {
       float max_sample=0;
       for (i=0;i<2*N;i++)
@@ -898,7 +707,7 @@
    for (i=1;i<N;i++)
       st->old_ps[i] = ps[i];
 
-   return is_speech;
+   return 1;
 }
 
 void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
@@ -920,7 +729,7 @@
       if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
       {
          if (echo)
-            st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-st->frame_size*st->frame_size*1.0*echo[i]);
+            st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-1.0*echo[i]);
          else
             st->noise[i] = .95f*st->noise[i] + .1f*st->ps[i];
       }
@@ -971,6 +780,7 @@
       break;
 
    case SPEEX_PREPROCESS_SET_VAD:
+      speex_warning("The VAD has been removed pending a complete rewrite");
       st->vad_enabled = (*(int*)ptr);
       break;
    case SPEEX_PREPROCESS_GET_VAD:



More information about the commits mailing list