[xiph-cvs] cvs commit: speex/src speexenc.c
Jean-Marc Valin
jm at xiph.org
Wed Dec 11 23:51:35 PST 2002
jm 02/12/12 02:51:35
Modified: . TODO
libspeex nb_celp.c nb_celp.h speex.h vbr.c
src speexenc.c
Log:
Implemented VAD-only mode with comfort noise generation, did some tuning to
the VAD too. Next thing: adapt VAD-only to work with wideband too.
Revision Changes Path
1.16 +11 -6 speex/TODO
Index: TODO
===================================================================
RCS file: /usr/local/cvsroot/speex/TODO,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- TODO 2 Dec 2002 00:12:48 -0000 1.15
+++ TODO 12 Dec 2002 07:51:35 -0000 1.16
@@ -2,14 +2,13 @@
*Average bit-rate VBR
-Add maximum/minimum bit-rate control for VBR
*Add "VAD-only" VBR (constant bit-rate or comfort noise)
-*Add encoder/decoder reset function
-Get the encoder to use the rate of packet loss (more conservative pitch gains)
-*No transmission when constant noise/silence
+-No transmission when constant noise/silence
-Codec
-*Improve perceptual enhancement (including wideband)
--Improve VAD
--Improve narrowband vocoder
+Long-term quality improvements
+-Improve perceptual enhancement (including wideband)
+-Improve VAD and VBR
+-Packet-loss concealment
Standards
*Complete Speex RTP profile
@@ -17,3 +16,9 @@
-MS ACM wrapper
*required for 1.0
+
+ideas:
+peelable stream (double codebook, higher bands, stereo)
+DTX in Ogg? (painful)
+LPC from spectral domain
+Masking curve from Vorbis
<p><p>1.91 +29 -16 speex/libspeex/nb_celp.c
Index: nb_celp.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/nb_celp.c,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -r1.90 -r1.91
--- nb_celp.c 11 Dec 2002 22:03:35 -0000 1.90
+++ nb_celp.c 12 Dec 2002 07:51:35 -0000 1.91
@@ -93,7 +93,7 @@
st->preemph = mode->preemph;
st->submodes=mode->submodes;
- st->submodeID=mode->defaultSubmode;
+ st->submodeID=st->submodeSelect=mode->defaultSubmode;
st->pre_mem=0;
st->pre_mem2=0;
st->bounded_pitch = 1;
@@ -162,14 +162,12 @@
st->pitch = (int*)speex_alloc(st->nbSubframes*sizeof(int));
- if (1) {
- st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
- vbr_init(st->vbr);
- st->vbr_quality = 8;
- st->vbr_enabled = 0;
- } else {
- st->vbr = 0;
- }
+ st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
+ vbr_init(st->vbr);
+ st->vbr_quality = 8;
+ st->vbr_enabled = 0;
+ st->vad_enabled = 0;
+
st->complexity=2;
st->sampling_rate=8000;
@@ -320,7 +318,7 @@
/*Open-loop pitch*/
- if (!st->submodes[st->submodeID] || st->vbr_enabled || SUBMODE(forced_pitch_gain) ||
+ if (!st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) ||
SUBMODE(lbr_pitch) != -1)
{
int nol_pitch[6];
@@ -369,8 +367,8 @@
ol_gain=sqrt(1+ol_gain/st->frameSize);
}
- /*Experimental VBR stuff*/
- if (st->vbr)
+ /*VBR stuff*/
+ if (st->vbr && (st->vbr_enabled||st->vad_enabled))
{
st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, ol_pitch_coef);
/*if (delta_qual<0)*/
@@ -399,8 +397,17 @@
speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
/*fprintf(stderr, "encode: %d %d\n",st->submodeID, mode);*/
} else {
- st->relative_quality = -1;
- }
+ /*VAD only case*/
+ int mode;
+ if (st->relative_quality<2.0)
+ mode=0;
+ else
+ mode=st->submodeSelect;
+ /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
+ st->submodeID=mode;
+ }
+ } else {
+ st->relative_quality = -1;
}
/*printf ("VBR quality = %f\n", vbr_qual);*/
@@ -1405,7 +1412,7 @@
break;
case SPEEX_SET_LOW_MODE:
case SPEEX_SET_MODE:
- st->submodeID = (*(int*)ptr);
+ st->submodeSelect = st->submodeID = (*(int*)ptr);
break;
case SPEEX_GET_LOW_MODE:
case SPEEX_GET_MODE:
@@ -1417,6 +1424,12 @@
case SPEEX_GET_VBR:
(*(int*)ptr) = st->vbr_enabled;
break;
+ case SPEEX_SET_VAD:
+ st->vad_enabled = (*(int*)ptr);
+ break;
+ case SPEEX_GET_VAD:
+ (*(int*)ptr) = st->vad_enabled;
+ break;
case SPEEX_SET_VBR_QUALITY:
st->vbr_quality = (*(float*)ptr);
break;
@@ -1430,7 +1443,7 @@
quality = 0;
if (quality > 10)
quality = 10;
- st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
+ st->submodeSelect = st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
}
break;
case SPEEX_SET_COMPLEXITY:
<p><p>1.38 +2 -0 speex/libspeex/nb_celp.h
Index: nb_celp.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/nb_celp.h,v
retrieving revision 1.37
retrieving revision 1.38
diff -u -r1.37 -r1.38
--- nb_celp.h 11 Dec 2002 06:49:40 -0000 1.37
+++ nb_celp.h 12 Dec 2002 07:51:35 -0000 1.38
@@ -102,11 +102,13 @@
float vbr_quality; /**< Quality setting for VBR encoding */
float relative_quality; /**< Relative quality that will be needed by VBR */
int vbr_enabled; /**< 1 for enabling VBR, 0 otherwise */
+ int vad_enabled; /**< 1 for enabling VAD, 0 otherwise */
int complexity; /**< Complexity setting (0-10 from least complex to most complex) */
int sampling_rate;
SpeexSubmode **submodes; /**< Sub-mode data */
int submodeID; /**< Activated sub-mode */
+ int submodeSelect; /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
} EncState;
/**Structure representing the full state of the narrowband decoder*/
<p><p>1.68 +5 -1 speex/libspeex/speex.h
Index: speex.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/speex.h,v
retrieving revision 1.67
retrieving revision 1.68
diff -u -r1.67 -r1.68
--- speex.h 2 Dec 2002 00:12:48 -0000 1.67
+++ speex.h 12 Dec 2002 07:51:35 -0000 1.68
@@ -104,7 +104,11 @@
#define SPEEX_RESET_STATE 26
-#define SPEEX_GET_RELATIVE_QUALITY 27
+#define SPEEX_GET_RELATIVE_QUALITY 29
+
+#define SPEEX_SET_VAD 30
+#define SPEEX_GET_VAD 31
+
/* Used internally, not to be used in applications */
#define SPEEX_GET_PI_GAIN 100
<p><p>1.15 +23 -7 speex/libspeex/vbr.c
Index: vbr.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/vbr.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- vbr.c 11 Dec 2002 22:03:35 -0000 1.14
+++ vbr.c 12 Dec 2002 07:51:35 -0000 1.15
@@ -41,7 +41,7 @@
#define sqr(x) ((x)*(x))
-#define MIN_ENERGY 1000
+#define MIN_ENERGY 6000
#define NOISE_POW .3
@@ -147,8 +147,13 @@
vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
pow_ener = pow(ener,NOISE_POW);
+ if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
+ vbr->noise_accum = .05*pow_ener;
+
if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
- || (voicing<.2 && non_st < .1))
+ || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
+ || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
+ || (voicing<0 && non_st < .05))
{
float tmp;
va = 0;
@@ -167,6 +172,12 @@
vbr->consec_noise=0;
}
+ if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
+ {
+ vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
+ vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;
+ }
+
/* Checking for very low absolute energy */
if (ener < 30000)
{
@@ -219,23 +230,28 @@
if (vbr->consec_noise>=12)
qual-=1.3;
*/
+ if (vbr->consec_noise>=3)
+ qual=4;
+
if (vbr->consec_noise)
- qual-=.8*log(2.0 + vbr->consec_noise);
+ qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
if (qual<0)
qual=0;
if (ener<60000)
{
- if (vbr->consec_noise)
- qual-=0.8*log(2.0 + vbr->consec_noise);
- if (ener<10000&&vbr->consec_noise)
- qual-=0.8*log(2.0 + vbr->consec_noise);
+ if (vbr->consec_noise>2)
+ qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
+ if (ener<10000&&vbr->consec_noise>2)
+ qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
if (qual<0)
qual=0;
qual += .3*log(ener/60000.0);
}
if (qual<-1)
qual=-1;
+
+ /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
vbr->last_pitch_coef = pitch_coef;
vbr->last_quality = qual;
<p><p>1.61 +12 -0 speex/src/speexenc.c
Index: speexenc.c
===================================================================
RCS file: /usr/local/cvsroot/speex/src/speexenc.c,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- speexenc.c 11 Nov 2002 01:08:29 -0000 1.60
+++ speexenc.c 12 Dec 2002 07:51:35 -0000 1.61
@@ -144,6 +144,7 @@
printf (" --quality n Encoding quality (0-10), default 3\n");
printf (" --bitrate n Encoding bit-rate (use bit-rate n or lower)\n");
printf (" --vbr Enable variable bit-rate (VBR)\n");
+ printf (" --vad Enable voice activity detection (VAD)\n");
printf (" --comp n Set encoding complexity (0-10), default 3\n");
printf (" --nframes n Number of frames per Ogg packet (1-10), default 1\n");
printf (" --comment Add the given string as an extra comment. This may be\n");
@@ -177,6 +178,7 @@
float input[MAX_FRAME_SIZE];
int frame_size;
int vbr_enabled=0;
+ int vad_enabled=0;
int nbBytes;
SpeexMode *mode=NULL;
void *st;
@@ -188,6 +190,7 @@
{"ultra-wideband", no_argument, NULL, 0},
{"narrowband", no_argument, NULL, 0},
{"vbr", no_argument, NULL, 0},
+ {"vad", no_argument, NULL, 0},
{"quality", required_argument, NULL, 0},
{"bitrate", required_argument, NULL, 0},
{"nframes", required_argument, NULL, 0},
@@ -253,6 +256,9 @@
} else if (strcmp(long_options[option_index].name,"vbr")==0)
{
vbr_enabled=1;
+ } else if (strcmp(long_options[option_index].name,"vad")==0)
+ {
+ vad_enabled=1;
} else if (strcmp(long_options[option_index].name,"quality")==0)
{
quality = atoi (optarg);
@@ -533,6 +539,12 @@
int tmp;
tmp=1;
speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp);
+ }
+ if (vad_enabled)
+ {
+ int tmp;
+ tmp=1;
+ speex_encoder_ctl(st, SPEEX_SET_VAD, &tmp);
}
if (quality >= 0)
{
<p><p>--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list