[xiph-cvs] cvs commit: speex/src speexenc.c

Jean-Marc Valin jm at xiph.org
Wed Dec 11 23:51:35 PST 2002



jm          02/12/12 02:51:35

  Modified:    .        TODO
               libspeex nb_celp.c nb_celp.h speex.h vbr.c
               src      speexenc.c
  Log:
  Implemented VAD-only mode with comfort noise generation, did some tuning to
  the VAD too. Next thing: adapt VAD-only to work with wideband too.

Revision  Changes    Path
1.16      +11 -6     speex/TODO

Index: TODO
===================================================================
RCS file: /usr/local/cvsroot/speex/TODO,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- TODO	2 Dec 2002 00:12:48 -0000	1.15
+++ TODO	12 Dec 2002 07:51:35 -0000	1.16
@@ -2,14 +2,13 @@
 *Average bit-rate VBR
 -Add maximum/minimum bit-rate control for VBR
 *Add "VAD-only" VBR (constant bit-rate or comfort noise)
-*Add encoder/decoder reset function
 -Get the encoder to use the rate of packet loss (more conservative pitch gains)
-*No transmission when constant noise/silence
+-No transmission when constant noise/silence
 
-Codec
-*Improve perceptual enhancement (including wideband)
--Improve VAD
--Improve narrowband vocoder
+Long-term quality improvements
+-Improve perceptual enhancement (including wideband)
+-Improve VAD and VBR
+-Packet-loss concealment
 
 Standards
 *Complete Speex RTP profile
@@ -17,3 +16,9 @@
 -MS ACM wrapper
 
 *required for 1.0
+
+ideas:
+peelable stream (double codebook, higher bands, stereo)
+DTX in Ogg? (painful)
+LPC from spectral domain
+Masking curve from Vorbis

<p><p>1.91      +29 -16    speex/libspeex/nb_celp.c

Index: nb_celp.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/nb_celp.c,v
retrieving revision 1.90
retrieving revision 1.91
diff -u -r1.90 -r1.91
--- nb_celp.c	11 Dec 2002 22:03:35 -0000	1.90
+++ nb_celp.c	12 Dec 2002 07:51:35 -0000	1.91
@@ -93,7 +93,7 @@
    st->preemph = mode->preemph;
   
    st->submodes=mode->submodes;
-   st->submodeID=mode->defaultSubmode;
+   st->submodeID=st->submodeSelect=mode->defaultSubmode;
    st->pre_mem=0;
    st->pre_mem2=0;
    st->bounded_pitch = 1;
@@ -162,14 +162,12 @@
 
    st->pitch = (int*)speex_alloc(st->nbSubframes*sizeof(int));
 
-   if (1) {
-      st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
-      vbr_init(st->vbr);
-      st->vbr_quality = 8;
-      st->vbr_enabled = 0;
-   } else {
-      st->vbr = 0;
-   }
+   st->vbr = (VBRState*)speex_alloc(sizeof(VBRState));
+   vbr_init(st->vbr);
+   st->vbr_quality = 8;
+   st->vbr_enabled = 0;
+   st->vad_enabled = 0;
+
    st->complexity=2;
    st->sampling_rate=8000;
 
@@ -320,7 +318,7 @@
 
 
       /*Open-loop pitch*/
-      if (!st->submodes[st->submodeID] || st->vbr_enabled || SUBMODE(forced_pitch_gain) ||
+      if (!st->submodes[st->submodeID] || st->vbr_enabled || st->vad_enabled || SUBMODE(forced_pitch_gain) ||
           SUBMODE(lbr_pitch) != -1)
       {
          int nol_pitch[6];
@@ -369,8 +367,8 @@
       ol_gain=sqrt(1+ol_gain/st->frameSize);
    }
 
-   /*Experimental VBR stuff*/
-   if (st->vbr)
+   /*VBR stuff*/
+   if (st->vbr && (st->vbr_enabled||st->vad_enabled))
    {
       st->relative_quality = vbr_analysis(st->vbr, in, st->frameSize, ol_pitch, ol_pitch_coef);
       /*if (delta_qual<0)*/
@@ -399,8 +397,17 @@
          speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);
          /*fprintf(stderr, "encode: %d %d\n",st->submodeID, mode);*/
       } else {
-         st->relative_quality = -1;
-      }
+         /*VAD only case*/
+         int mode;
+         if (st->relative_quality<2.0)
+            mode=0;
+         else
+            mode=st->submodeSelect;
+         /*speex_encoder_ctl(state, SPEEX_SET_MODE, &mode);*/
+         st->submodeID=mode;
+      } 
+   } else {
+      st->relative_quality = -1;
    }
    /*printf ("VBR quality = %f\n", vbr_qual);*/
 
@@ -1405,7 +1412,7 @@
       break;
    case SPEEX_SET_LOW_MODE:
    case SPEEX_SET_MODE:
-      st->submodeID = (*(int*)ptr);
+      st->submodeSelect = st->submodeID = (*(int*)ptr);
       break;
    case SPEEX_GET_LOW_MODE:
    case SPEEX_GET_MODE:
@@ -1417,6 +1424,12 @@
    case SPEEX_GET_VBR:
       (*(int*)ptr) = st->vbr_enabled;
       break;
+   case SPEEX_SET_VAD:
+      st->vad_enabled = (*(int*)ptr);
+      break;
+   case SPEEX_GET_VAD:
+      (*(int*)ptr) = st->vad_enabled;
+      break;
    case SPEEX_SET_VBR_QUALITY:
       st->vbr_quality = (*(float*)ptr);
       break;
@@ -1430,7 +1443,7 @@
             quality = 0;
          if (quality > 10)
             quality = 10;
-         st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
+         st->submodeSelect = st->submodeID = ((SpeexNBMode*)(st->mode->mode))->quality_map[quality];
       }
       break;
    case SPEEX_SET_COMPLEXITY:

<p><p>1.38      +2 -0      speex/libspeex/nb_celp.h

Index: nb_celp.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/nb_celp.h,v
retrieving revision 1.37
retrieving revision 1.38
diff -u -r1.37 -r1.38
--- nb_celp.h	11 Dec 2002 06:49:40 -0000	1.37
+++ nb_celp.h	12 Dec 2002 07:51:35 -0000	1.38
@@ -102,11 +102,13 @@
    float  vbr_quality;    /**< Quality setting for VBR encoding */
    float  relative_quality; /**< Relative quality that will be needed by VBR */
    int    vbr_enabled;    /**< 1 for enabling VBR, 0 otherwise */
+   int    vad_enabled;    /**< 1 for enabling VAD, 0 otherwise */
    int    complexity;     /**< Complexity setting (0-10 from least complex to most complex) */
    int    sampling_rate;
 
    SpeexSubmode **submodes; /**< Sub-mode data */
    int    submodeID;      /**< Activated sub-mode */
+   int    submodeSelect;  /**< Mode chosen by the user (may differ from submodeID if VAD is on) */
 } EncState;
 
 /**Structure representing the full state of the narrowband decoder*/

<p><p>1.68      +5 -1      speex/libspeex/speex.h

Index: speex.h
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/speex.h,v
retrieving revision 1.67
retrieving revision 1.68
diff -u -r1.67 -r1.68
--- speex.h	2 Dec 2002 00:12:48 -0000	1.67
+++ speex.h	12 Dec 2002 07:51:35 -0000	1.68
@@ -104,7 +104,11 @@
 
 #define SPEEX_RESET_STATE 26
 
-#define SPEEX_GET_RELATIVE_QUALITY 27
+#define SPEEX_GET_RELATIVE_QUALITY 29
+
+#define SPEEX_SET_VAD 30
+#define SPEEX_GET_VAD 31
+
 
    /* Used internally, not to be used in applications */
 #define SPEEX_GET_PI_GAIN 100

<p><p>1.15      +23 -7     speex/libspeex/vbr.c

Index: vbr.c
===================================================================
RCS file: /usr/local/cvsroot/speex/libspeex/vbr.c,v
retrieving revision 1.14
retrieving revision 1.15
diff -u -r1.14 -r1.15
--- vbr.c	11 Dec 2002 22:03:35 -0000	1.14
+++ vbr.c	12 Dec 2002 07:51:35 -0000	1.15
@@ -41,7 +41,7 @@
 
 #define sqr(x) ((x)*(x))
 
-#define MIN_ENERGY 1000
+#define MIN_ENERGY 6000
 #define NOISE_POW .3
 
 
@@ -147,8 +147,13 @@
    vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy_alpha*ener;
    vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count;
    pow_ener = pow(ener,NOISE_POW);
+   if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY)
+      vbr->noise_accum = .05*pow_ener;
+
    if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level)
-       || (voicing<.2 && non_st < .1))
+       || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level)
+       || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level)
+       || (voicing<0 && non_st < .05))
    {
       float tmp;
       va = 0;
@@ -167,6 +172,12 @@
       vbr->consec_noise=0;
    }
 
+   if (pow_ener < vbr->noise_level && ener>MIN_ENERGY)
+   {
+      vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener;
+      vbr->noise_accum_count = .95*vbr->noise_accum_count + .05;      
+   }
+
    /* Checking for very low absolute energy */
    if (ener < 30000)
    {
@@ -219,23 +230,28 @@
    if (vbr->consec_noise>=12)
       qual-=1.3;
    */
+   if (vbr->consec_noise>=3)
+      qual=4;
+
    if (vbr->consec_noise)
-      qual-=.8*log(2.0 + vbr->consec_noise);
+      qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3));
    if (qual<0)
       qual=0;
    
    if (ener<60000)
    {
-      if (vbr->consec_noise)
-         qual-=0.8*log(2.0 + vbr->consec_noise);
-      if (ener<10000&&vbr->consec_noise)
-         qual-=0.8*log(2.0 + vbr->consec_noise);
+      if (vbr->consec_noise>2)
+         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
+      if (ener<10000&&vbr->consec_noise>2)
+         qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3));
       if (qual<0)
          qual=0;
       qual += .3*log(ener/60000.0);
    }
    if (qual<-1)
       qual=-1;
+
+   /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise_level), va);*/
 
    vbr->last_pitch_coef = pitch_coef;
    vbr->last_quality = qual;

<p><p>1.61      +12 -0     speex/src/speexenc.c

Index: speexenc.c
===================================================================
RCS file: /usr/local/cvsroot/speex/src/speexenc.c,v
retrieving revision 1.60
retrieving revision 1.61
diff -u -r1.60 -r1.61
--- speexenc.c	11 Nov 2002 01:08:29 -0000	1.60
+++ speexenc.c	12 Dec 2002 07:51:35 -0000	1.61
@@ -144,6 +144,7 @@
    printf (" --quality n        Encoding quality (0-10), default 3\n"); 
    printf (" --bitrate n        Encoding bit-rate (use bit-rate n or lower)\n"); 
    printf (" --vbr              Enable variable bit-rate (VBR)\n"); 
+   printf (" --vad              Enable voice activity detection (VAD)\n"); 
    printf (" --comp n           Set encoding complexity (0-10), default 3\n"); 
    printf (" --nframes n        Number of frames per Ogg packet (1-10), default 1\n"); 
    printf (" --comment          Add the given string as an extra comment. This may be\n");
@@ -177,6 +178,7 @@
    float input[MAX_FRAME_SIZE];
    int frame_size;
    int vbr_enabled=0;
+   int vad_enabled=0;
    int nbBytes;
    SpeexMode *mode=NULL;
    void *st;
@@ -188,6 +190,7 @@
       {"ultra-wideband", no_argument, NULL, 0},
       {"narrowband", no_argument, NULL, 0},
       {"vbr", no_argument, NULL, 0},
+      {"vad", no_argument, NULL, 0},
       {"quality", required_argument, NULL, 0},
       {"bitrate", required_argument, NULL, 0},
       {"nframes", required_argument, NULL, 0},
@@ -253,6 +256,9 @@
          } else if (strcmp(long_options[option_index].name,"vbr")==0)
          {
             vbr_enabled=1;
+         } else if (strcmp(long_options[option_index].name,"vad")==0)
+         {
+            vad_enabled=1;
          } else if (strcmp(long_options[option_index].name,"quality")==0)
          {
             quality = atoi (optarg);
@@ -533,6 +539,12 @@
       int tmp;
       tmp=1;
       speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp);
+   }
+   if (vad_enabled)
+   {
+      int tmp;
+      tmp=1;
+      speex_encoder_ctl(st, SPEEX_SET_VAD, &tmp);
    }
    if (quality >= 0)
    {

<p><p>--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list