[xiph-commits] r9519 - trunk/speex/libspeex
jm at svn.xiph.org
jm at svn.xiph.org
Mon Jun 27 00:01:24 PDT 2005
Author: jm
Date: 2005-06-27 00:01:21 -0700 (Mon, 27 Jun 2005)
New Revision: 9519
Modified:
trunk/speex/libspeex/preprocess.c
Log:
Did some tuning, especially for the speech probability of presence. Some
cleanup too.
Modified: trunk/speex/libspeex/preprocess.c
===================================================================
--- trunk/speex/libspeex/preprocess.c 2005-06-27 06:57:49 UTC (rev 9518)
+++ trunk/speex/libspeex/preprocess.c 2005-06-27 07:01:21 UTC (rev 9519)
@@ -90,26 +90,30 @@
which multiplied by xi/(1+xi) is the optimal gain
in the loudness domain ( sqrt[amplitude] )
*/
-static float hypergeom_gain(float x)
+static inline float hypergeom_gain(float x)
{
int ind;
float integer, frac;
static const float table[21] = {
- 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
- 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
+ 0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
+ 1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
-
+
if (x>9.5)
- return 1+.12/x;
-
+ return 1+.1296/x;
+
integer = floor(2*x);
frac = 2*x-integer;
ind = (int)integer;
- /*if (ind > 20 || ind < 0)
- fprintf (stderr, "error: %d %f\n", ind, x);*/
+
return ((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
}
+static inline float qcurve(float x)
+{
+ return 1.f/(1.f+.1f/(x*x));
+}
+
SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
{
int i;
@@ -654,7 +658,8 @@
/* Compute a posteriori SNR */
for (i=1;i<N;i++)
{
- st->post[i] = ps[i]/(1.f+NOISE_OVERCOMPENS*st->noise[i]+st->echo_noise[i]+st->reverb_estimate[i]) - 1.f;
+ float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
+ st->post[i] = ps[i]/tot_noise - 1.f;
if (st->post[i]>100.f)
st->post[i]=100.f;
/*if (st->post[i]<0)
@@ -675,32 +680,14 @@
/* A priori update rate */
float gamma;
float min_gamma=0.12f;
- gamma = 1.0f/st->nb_preprocess;
- /*Make update rate smaller when there's no speech*/
-#if 0
- if (mean_post<3.5 && mean_prior < 1)
- min_gamma *= (mean_post+.5);
- else
- min_gamma *= 4.;
-#else
- min_gamma = .1f*fabs(mean_prior - mean_post)*fabs(mean_prior - mean_post);
- if (min_gamma>.15f)
- min_gamma = .15f;
- if (min_gamma<.02f)
- min_gamma = .02f;
-#endif
- /*min_gamma = .08;*/
-
- /*if (gamma<min_gamma)*/
- gamma=min_gamma;
- gamma = .1;
for (i=1;i<N;i++)
{
-
+ float gamma = .1+.9*st->prior[i]*st->prior[i]/((1+st->prior[i])*(1+st->prior[i]));
+ float tot_noise = 1.f+ NOISE_OVERCOMPENS*st->noise[i] + st->echo_noise[i] + st->reverb_estimate[i];
/* A priori SNR update */
st->prior[i] = gamma*max(0.0f,st->post[i]) +
- (1.f-gamma)*st->gain[i]*st->gain[i]*st->old_ps[i]/(1.f+NOISE_OVERCOMPENS*st->noise[i]+st->echo_noise[i]+st->reverb_estimate[i]);
+ (1.f-gamma)* (.8*st->gain[i]*st->gain[i]*st->old_ps[i]/tot_noise + .2*st->prior[i]);
if (st->prior[i]>100.f)
st->prior[i]=100.f;
@@ -753,12 +740,12 @@
} else {
for (i=1;i<N-1;i++)
{
- if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
+ if (st->update_prob[i]<.5f/* || st->ps[i] < st->noise[i]*/)
{
if (echo)
- st->noise[i] = .90f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
+ st->noise[i] = .95f*st->noise[i] + .05f*max(1.0f,st->ps[i]-echo[i]);
else
- st->noise[i] = .90f*st->noise[i] + .1f*st->ps[i];
+ st->noise[i] = .95f*st->noise[i] + .05f*st->ps[i];
}
}
}
@@ -775,34 +762,10 @@
{
Zframe += st->zeta[i];
}
+ Zframe /= (freq_end-freq_start);
}
- Zframe /= N;
- if (Zframe<ZMIN)
- {
- Pframe = 0;
- } else {
- if (Zframe > 1.5f*st->Zlast)
- {
- Pframe = 1.f;
- st->Zpeak = Zframe;
- if (st->Zpeak > 10.f)
- st->Zpeak = 10.f;
- if (st->Zpeak < 1.f)
- st->Zpeak = 1.f;
- } else {
- if (Zframe < st->Zpeak*ZMIN)
- {
- Pframe = 0;
- } else if (Zframe > st->Zpeak*ZMAX)
- {
- Pframe = 1;
- } else {
- Pframe = log(Zframe/(st->Zpeak*ZMIN)) / log(ZMAX/ZMIN);
- }
- }
- }
- st->Zlast = Zframe;
+ Pframe = qcurve(Zframe);
/*fprintf (stderr, "%f\n", Pframe);*/
/* Compute gain according to the Ephraim-Malah algorithm */
@@ -822,14 +785,7 @@
zeta1 = st->zeta[i];
else
zeta1 = .25f*st->zeta[i-1] + .5f*st->zeta[i] + .25f*st->zeta[i+1];
- if (zeta1<ZMIN)
- P1 = 0.f;
- else if (zeta1>ZMAX)
- P1 = 1.f;
- else
- P1 = LOG_MIN_MAX_1 * log(ZMIN_1*zeta1);
-
- /*P1 = log(zeta1/ZMIN)/log(ZMAX/ZMIN);*/
+ P1 = qcurve (zeta1);
/* FIXME: add global prob (P2) */
q = 1-Pframe*P1;
@@ -839,16 +795,8 @@
p=1.f/(1.f + (q/(1.f-q))*(1.f+st->prior[i])*exp(-theta));
/*p=1;*/
-#if 0
- /* log-spectral magnitude estimator */
- if (theta<6)
- MM = 0.74082*pow(theta+1,.61)/sqrt(.0001+theta);
- else
- MM=1;
-#else
/* Optimal estimator for loudness domain */
MM = hypergeom_gain(theta);
-#endif
st->gain[i] = prior_ratio * MM;
/*Put some (very arbitraty) limit on the gain*/
@@ -860,14 +808,24 @@
st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];
if (st->denoise_enabled)
{
- st->gain2[i]=p*p*st->gain[i];
+ st->gain2[i] = p*p*st->gain[i];
+ /*st->gain2[i]=(p*sqrt(st->gain[i])+.05*(1-p))*(p*sqrt(st->gain[i])+.05*(1-p));*/
+ /*st->gain2[i] = pow(st->gain[i], p) * pow(.2f,1.f-p);*/
} else {
st->gain2[i]=1.f;
}
}
+
st->gain2[0]=st->gain[0]=0.f;
st->gain2[N-1]=st->gain[N-1]=0.f;
-
+ /*
+ for (i=30;i<N-2;i++)
+ {
+ st->gain[i] = st->gain2[i]*st->gain2[i] + (1-st->gain2[i])*.333*(.6*st->gain2[i-1]+st->gain2[i]+.6*st->gain2[i+1]+.4*st->gain2[i-2]+.4*st->gain2[i+2]);
+ }
+ for (i=30;i<N-2;i++)
+ st->gain2[i] = st->gain[i];
+ */
if (st->agc_enabled)
speex_compute_agc(st, mean_prior);
@@ -957,9 +915,9 @@
if (st->update_prob[i]<.5f || st->ps[i] < st->noise[i])
{
if (echo)
- st->noise[i] = .90f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
+ st->noise[i] = .95f*st->noise[i] + .1f*max(1.0f,st->ps[i]-echo[i]);
else
- st->noise[i] = .90f*st->noise[i] + .1f*st->ps[i];
+ st->noise[i] = .95f*st->noise[i] + .1f*st->ps[i];
}
}
More information about the commits
mailing list