[xiph-commits] r16483 - trunk/theora/lib

Tue Aug 18 14:56:31 PDT 2009

Author: xiphmont
Date: 2009-08-18 14:56:31 -0700 (Tue, 18 Aug 2009)
New Revision: 16483

Modified:
   trunk/theora/lib/encint.h
   trunk/theora/lib/rate.c
Log:
Commit derf's "simple_bias" patch to add a simpler but also more
robust rate-error bias correction to two-pass encoding mode.  It
doesn't hold a specific rate quite as tightly, but is also not prone
to being thrown way off by assymetrical scale error between passes.



Modified: trunk/theora/lib/encint.h
===================================================================

--- trunk/theora/lib/encint.h	2009-08-14 06:14:33 UTC (rev 16482)
+++ trunk/theora/lib/encint.h	2009-08-18 21:56:31 UTC (rev 16483)
@@ -35,7 +35,6 @@
 typedef struct oc_mb_enc_info         oc_mb_enc_info;
 typedef struct oc_mode_scheme_chooser oc_mode_scheme_chooser;
 typedef struct oc_iir_filter          oc_iir_filter;
-typedef struct oc_log_linear_fit      oc_log_linear_fit;
 typedef struct oc_frame_metrics       oc_frame_metrics;
 typedef struct oc_rc_state            oc_rc_state;
 typedef struct th_enc_ctx             oc_enc_ctx;
@@ -183,17 +182,6 @@
 
 
 
-/*A linear fit for the log-domain scale factors used in 2-pass.*/
-struct oc_log_linear_fit{
-  ogg_int64_t  x;
-  ogg_int64_t  y;
-  ogg_int64_t  x2;
-  ogg_int64_t  xy;
-  ogg_uint32_t n;
-};
-
-
-
 /*The 2-pass metrics associated with a single frame.*/
 struct oc_frame_metrics{
   /*The log base 2 of the scale factor for this frame in Q24 format.*/
@@ -282,8 +270,8 @@
   /*The frame count of each type in the current 2-pass window; this does not
      include dup frames.*/
   int                nframes[3];
-  /*Bias correction fits for the 1st-pass scale factors.*/
-  oc_log_linear_fit  corr[2];
+  /*The total accumulated estimation bias.*/
+  ogg_int64_t        rate_bias;
 };
 
 

Modified: trunk/theora/lib/rate.c
===================================================================
--- trunk/theora/lib/rate.c	2009-08-14 06:14:33 UTC (rev 16482)
+++ trunk/theora/lib/rate.c	2009-08-18 21:56:31 UTC (rev 16483)
@@ -303,6 +303,7 @@
   _rc->twopass_buffer_bytes=0;
   _rc->twopass_force_kf=0;
   _rc->frame_metrics=NULL;
+  _rc->rate_bias=0;
   if(_enc->state.info.target_bitrate>0){
     /*The buffer size is set equal to the keyframe interval, clamped to the
        range [12,256] frames.
@@ -436,15 +437,21 @@
 
 int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){
   ogg_int64_t  rate_total;
+  ogg_int64_t  rate_bias;
   int          nframes[2];
   int          buf_delay;
   ogg_int64_t  log_qtarget;
   ogg_int64_t  log_scale0;
+  ogg_int64_t  log_cur_scale;
+  ogg_int64_t  log_qexp;
+  int          exp0;
   int          old_qi;
   int          qi;
+  oc_restore_fpu(&_enc->state);
   /*Figure out how to re-distribute bits so that we hit our fullness target
      before the last keyframe in our current buffer window (after the current
      frame), or the end of the buffer window, whichever comes first.*/
+  log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33;
   switch(_enc->rc.twopass){
     default:{
       ogg_uint32_t next_key_frame;
@@ -539,46 +546,12 @@
         nframes[qti]--;
         scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
       }
-      /*Compute corrected log_scale estimates for each frame type from the
-         pass-1 scales we measured in the current window.*/
+      /*Compute log_scale estimates for each frame type from the pass-1 scales
+         we measured in the current window.*/
       for(qti=0;qti<2;qti++){
-        oc_log_linear_fit *fit;
-        ogg_int64_t        x;
-        x=nframes[qti]>0?
+        _enc->rc.log_scale[qti]=nframes[qti]>0?
          oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24):
          -_enc->rc.log_npixels;
-        fit=_enc->rc.corr+qti;
-        if(fit->n>0){
-          ogg_int64_t  var;
-          ogg_uint32_t n_2;
-          n_2=fit->n>>1;
-          var=fit->x2;
-          var-=(fit->x+2048>>12)*(((fit->x+2048>>12)+n_2)/fit->n);
-          if(var>fit->n){
-            ogg_int64_t cov;
-            ogg_int64_t beta;
-            ogg_int64_t alpha;
-            ogg_int64_t y;
-            cov=fit->xy;
-            cov-=(fit->y+2048>>12)*(((fit->x+2048>>12)+n_2)/fit->n);
-            /*beta is Q33.*/
-            beta=((cov+n_2)/fit->n<<33)/((var+n_2)/fit->n);
-            /*alpha is Q57.*/
-            alpha=((fit->y+n_2)/fit->n<<33)-beta*((fit->x+n_2)/fit->n);
-            /*Predict the mean y from the mean x.
-              What we're really trying to compensate for is error in exp[], not
-               error in the scales, and hence we can apply the correction to
-               the mean scale instead of applying it to each pass-1 scale and
-               then taking the mean.*/
-            y=(x+((ogg_int64_t)1<<32)>>33)*beta+alpha;
-            /*If we have enough points for a good estimation, use the corrected
-               predictor value directly.*/
-            if(fit->n>=(128<<qti))x=y;
-            /*Otherwise interpolate between the two.*/
-            else x+=fit->n*(y-x>>7+qti);
-          }
-        }
-        _enc->rc.log_scale[qti]=x;
       }
       /*If we're not using the same frame type as in pass 1, add a scale
          estimate for the corresponding frame using the current low-pass
@@ -594,10 +567,11 @@
          oc_bexp64(_enc->rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL;
         scale*=nframes[_qti];
         nframes[_qti]++;
-        scale+=oc_bexp_q24(_enc->rc.scalefilter[_qti].y[0]);
+        scale+=oc_bexp_q24(log_cur_scale>>33);
         _enc->rc.log_scale[_qti]=oc_blog64(scale)
          -oc_blog64(nframes[qti])-OC_Q57(24);
       }
+      else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33;
       /*Add the padding from above.
         This basically reverts to 1-pass estimations in the last keyframe
          interval.*/
@@ -618,8 +592,10 @@
       }
     }break;
   }
+  /*If we've been missing our target, add a penalty term.*/
+  rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))*buf_delay;
   /*rate_total is the total bits available over the next buf_delay frames.*/
-  rate_total=_enc->rc.fullness-_enc->rc.target
+  rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias
    +buf_delay*_enc->rc.bits_per_frame;
   log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels;
   /*If there aren't enough bits to achieve our desired fullness level, use the
@@ -658,12 +634,11 @@
      next buf_delay frames.
     However, we could overflow the buffer on the very next frame, so check for
      that here, if we're not using a soft target.*/
+  exp0=_enc->rc.exp[_qti];
   if(_enc->rc.cap_overflow){
     ogg_int64_t margin;
     ogg_int64_t soft_limit;
     ogg_int64_t log_soft_limit;
-    ogg_int64_t log_qexp;
-    int         exp0;
     /*Allow 3% of the buffer for prediction error.
       This should be plenty, and we don't mind if we go a bit over; we only
        want to keep these bits from being completely wasted.*/
@@ -672,7 +647,6 @@
     soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin);
     log_soft_limit=oc_blog64(soft_limit);
     /*If we're predicting we won't use that many...*/
-    exp0=_enc->rc.exp[_qti];
     log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
     if(log_scale0-log_qexp<log_soft_limit){
       /*Scale the adjustment based on how far into the margin we are.*/
@@ -700,14 +674,11 @@
      here, if we're not using a soft target.*/
   if(!_enc->rc.cap_underflow||_enc->rc.drop_frames){
     ogg_int64_t log_hard_limit;
-    ogg_int64_t log_qexp;
-    int         exp0;
     /*Compute the maximum number of bits we can use in the next frame.
       Allow 50% of the rate for a single frame for prediction error.
       This may not be enough for keyframes or sudden changes in complexity.*/
     log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1));
     /*If we're predicting we'll use more than this...*/
-    exp0=_enc->rc.exp[_qti];
     log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
     if(log_scale0-log_qexp>log_hard_limit){
       /*Force the target to hit our limit exactly.*/
@@ -717,6 +688,9 @@
       log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
     }
   }
+  /*Compute a final estimate of the number of bits we plan to use.*/
+  log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti];
+  _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp);
   qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi,
    _enc->state.info.quality,log_qtarget);
   /*Save the quantizer target for lambda calculations.*/
@@ -759,29 +733,6 @@
     }break;
     case 2:{
       /*Pass 2 mode:*/
-      /*Accumulate statistics for estimation bias correction.
-        Everything is done in Q24 format.*/
-      if(_bits>0&&_enc->rc.cur_metrics.frame_type==_qti){
-        oc_log_linear_fit *fit;
-        ogg_int64_t        x;
-        ogg_int64_t        y;
-        x=_enc->rc.cur_metrics.log_scale;
-        y=log_scale>>33;
-        fit=_enc->rc.corr+_qti;
-        /*Use long-term exponential moving averages for the fit statistics.*/
-        if(fit->n>=1000){
-          fit->n>>=1;
-          fit->x>>=1;
-          fit->y>>=1;
-          fit->x2>>=1;
-          fit->xy>>=1;
-        }
-        fit->n++;
-        fit->x+=x;
-        fit->y+=y;
-        fit->x2+=(x+2048>>12)*(x+2048>>12);
-        fit->xy+=(x+2048>>12)*(y+2048>>12);
-      }
       if(!_trial){
         ogg_int64_t next_frame_num;
         int         qti;
@@ -866,6 +817,8 @@
     if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){
       _enc->rc.fullness=0;
     }
+    /*Adjust the bias for the real bits we've used.*/
+    _enc->rc.rate_bias-=_bits;
   }
   return dropped;
 }
@@ -1024,7 +977,6 @@
       }
       _enc->rc.exp[0]=exp[0];
       _enc->rc.exp[1]=exp[1];
-      memset(_enc->rc.corr,0,sizeof(_enc->rc.corr));
       /*Clear the header data from the buffer to make room for packet data.*/
       _enc->rc.twopass_buffer_fill=0;
       _enc->rc.twopass_buffer_bytes=0;