[xiph-commits] r15842 - branches/theora-thusnelda/lib/enc

Thu Mar 26 14:43:14 PDT 2009

Author: tterribe
Date: 2009-03-26 14:43:13 -0700 (Thu, 26 Mar 2009)
New Revision: 15842

Modified:
   branches/theora-thusnelda/lib/enc/codec_internal.h
   branches/theora-thusnelda/lib/enc/encoder_quant.c
   branches/theora-thusnelda/lib/enc/encoder_toplevel.c
   branches/theora-thusnelda/lib/enc/enquant.h
Log:
Convert some more rate-control operations to the log domain, and tune the
 various parameters.
It's surprising how close my initial "random guesses" were to the best values.


Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================

--- branches/theora-thusnelda/lib/enc/codec_internal.h	2009-03-24 22:01:40 UTC (rev 15841)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h	2009-03-26 21:43:13 UTC (rev 15842)
@@ -196,9 +196,9 @@
   ogg_int64_t max;
   ogg_int64_t log_npixels;
   unsigned    exp[2];
-  unsigned    scale[2];
+  ogg_int64_t log_scale[2];
+  ogg_int64_t log_qtarget;
   int         buf_delay;
-  int         qtarget;
 };
 
 /* Encoder (Compressor) instance -- installed in a theora_state */
@@ -332,17 +332,10 @@
   /*An "average" quantizer for each quantizer type (INTRA or INTER) and QI
      value.
     This is used to paramterize the rate control decisions.
-    It is scaled by a factor of 8, which is necessary to gain sufficient
-     resolution to distinguish the original VP3 quantizers at the low end (even
-     then some INTRA quantizers are indistinguishable, but they really _are_
-     essentially the same, which is an unfortunate effect of VP3 a) using the
-     same DC scale for many QI values and b) lopping off the two fractional
-     bits of quantizer precision for essentially no reason and then spacing its
-     AC scale factors very closely.
-    Keep in mind these are in the DCT domain, and so are scaled by an
-     additional factor of 4 from the pixel domain, for a total scale factor of
-     32.*/
-  ogg_uint16_t     qavg[2][64];
+    They are kept in the log domain to simplify later processing.
+    Keep in mind these are DCT domain quantizers, and so are scaled by an
+     additional factor of 4 from the pixel domain.*/
+  ogg_int64_t      log_qavg[2][64];
   /*The buffer state used to drive rate control.*/
   oc_rc_state      rc;
   DspFunctions     dsp;  /* Selected functions for this platform */

Modified: branches/theora-thusnelda/lib/enc/encoder_quant.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_quant.c	2009-03-24 22:01:40 UTC (rev 15841)
+++ branches/theora-thusnelda/lib/enc/encoder_quant.c	2009-03-26 21:43:13 UTC (rev 15842)
@@ -25,15 +25,6 @@
 static const unsigned OC_DC_QUANT_MIN[2]={4<<2,8<<2};
 static const unsigned OC_AC_QUANT_MIN[2]={2<<2,4<<2};
 
-/*Reciprocal square root.
-  Return: 2**15/sqrt(_x).*/
-static ogg_uint16_t oc_rsqrt(ogg_uint32_t _x){
-  /*A simple polynomial approximation of this would be fine, but since we have
-     the routines and this is not performance critical, let's do it
-     accurately.*/
-  return (ogg_uint16_t)oc_bexp64(OC_Q57(15)-(oc_blog64(_x)>>1));
-}
-
 void oc_quant_params_pack(oggpack_buffer *_opb,const th_quant_info *_qinfo){
   const th_quant_ranges *qranges;
   const th_quant_base   *base_mats[2*3*64];
@@ -189,16 +180,16 @@
 };
 
 /*The fraction of the squared magnitude of the residuals in each color channel
-   relative to the total, scaled by 2**11, for each pixel format.
+   relative to the total, scaled by 2**16, for each pixel format.
   These values were measured after motion-compensated prediction, before
    quantization, over a large set of test video encoded at all possible rates.
   TODO: These values are only from INTER frames; it should be re-measured for
    INTRA frames.*/
 static ogg_uint16_t OC_PCD[4][3]={
- {1873,  95,  80},
- {1725, 175, 148},
- {1725, 175, 148},
- {1490, 302, 256}
+  {59926, 3038, 2572},
+  {55201, 5597, 4738},
+  {55201, 5597, 4738},
+  {47682, 9669, 8185}
 };
 
 
@@ -281,8 +272,8 @@
       The value Q*lambda completely determines the entropy of the
        coefficients.*/
     for(qi=0;qi<64;qi++){
-      ogg_uint32_t q;
-      q=0;
+      ogg_int64_t q2;
+      q2=0;
       for(pli=0;pli<3;pli++){
         ogg_uint32_t qp;
         qp=0;
@@ -293,9 +284,10 @@
           rq=(OC_RPSD[qti][ci]+(qd>>1))/qd;
           qp+=rq*(ogg_uint32_t)rq;
         }
-        q+=OC_PCD[cpi->info.pixelformat][pli]*(qp+128>>8);
+        q2+=OC_PCD[cpi->info.pixelformat][pli]*(ogg_int64_t)qp;
       }
-      cpi->qavg[qti][qi]=oc_rsqrt(q+1024>>11);
+      /*qavg=1.0/sqrt(q2).*/
+      cpi->log_qavg[qti][qi]=OC_Q57(48)-oc_blog64(q2)>>1;
     }
   }
 }

Modified: branches/theora-thusnelda/lib/enc/encoder_toplevel.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_toplevel.c	2009-03-24 22:01:40 UTC (rev 15841)
+++ branches/theora-thusnelda/lib/enc/encoder_toplevel.c	2009-03-26 21:43:13 UTC (rev 15842)
@@ -31,7 +31,6 @@
 
 static void oc_enc_calc_lambda(CP_INSTANCE *cpi){
   ogg_int64_t l;
-  int         q;
   /*For now, lambda is fixed depending on the qi value and frame type:
       lambda=1.125*(qavg[qti][qi]**1.5)
     A more adaptive scheme might perform better, but Theora's behavior does not
@@ -40,11 +39,10 @@
     This allows us to scale to rates slightly lower than we'd normally be able
      to reach, and give the rate control a semblance of "fractional QI"
      precision.*/
-  if(cpi->info.target_bitrate>0)q=cpi->rc.qtarget;
-  else q=cpi->qavg[cpi->FrameType!=KEY_FRAME][cpi->BaseQ];
-  l=oc_blog64(q)-OC_Q57(3);
+  if(cpi->info.target_bitrate>0)l=cpi->rc.log_qtarget;
+  else l=cpi->log_qavg[cpi->FrameType!=KEY_FRAME][cpi->BaseQ];
   /*Raise to the 1.5 power.*/
-  l+=(l>>1);
+  l+=l>>1;
   /*Multiply by 1.125.*/
   l+=0x00570068E7EF5A1ELL;
   /*The upper bound here is 0x48000.*/
@@ -84,49 +82,50 @@
     TODO: These still need to be tuned.*/
   npixels=_info->width*(ogg_int64_t)_info->height;
   _rc->log_npixels=oc_blog64(npixels);
-  ibpp=(npixels+(_rc->bits_per_frame>>1))/_rc->bits_per_frame;
-  if(ibpp<10){
+  ibpp=npixels/_rc->bits_per_frame;
+  if(ibpp<1){
+    _rc->exp[0]=59;
+    _rc->log_scale[0]=oc_blog64(1997)-OC_Q57(8);
+  }
+  else if(ibpp<2){
+    _rc->exp[0]=55;
+    _rc->log_scale[0]=oc_blog64(1604)-OC_Q57(8);
+  }
+  else{
     _rc->exp[0]=48;
-    _rc->scale[0]=2199;
-    _rc->exp[1]=77;
-    _rc->scale[1]=2500;
+    _rc->log_scale[0]=oc_blog64(834)-OC_Q57(8);
   }
-  else if(ibpp<20){
-    _rc->exp[0]=51;
-    _rc->scale[0]=1781;
-    _rc->exp[1]=90;
-    _rc->scale[1]=1700;
+  if(ibpp<4){
+    _rc->exp[1]=100;
+    _rc->log_scale[1]=oc_blog64(2249)-OC_Q57(8);
   }
+  else if(ibpp<8){
+    _rc->exp[1]=95;
+    _rc->log_scale[1]=oc_blog64(1751)-OC_Q57(8);
+  }
   else{
-    _rc->exp[0]=54;
-    _rc->scale[0]=870;
-    _rc->exp[1]=102;
-    _rc->scale[1]=1300;
+    _rc->exp[1]=73;
+    _rc->log_scale[1]=oc_blog64(1260)-OC_Q57(8);
   }
 }
 
-static unsigned OC_RATE_SMOOTHING[2]={0x80,0x80};
-
-/*TODO: Convert the following entirely to fixed point.*/
-
 static void oc_enc_update_rc_state(CP_INSTANCE *cpi,
  long _bits,int _qti,int _qi,int _trial){
+  static const unsigned OC_SCALE_SMOOTHING[2]={0x13,0x00};
   ogg_int64_t   log_scale;
   ogg_int64_t   log_bits;
   ogg_int64_t   log_qexp;
-  ogg_uint32_t  scale;
   /*Compute the estimated scale factor for this frame type.*/
   log_bits=oc_blog64(_bits);
-  log_qexp=oc_blog64(cpi->qavg[_qti][_qi])-OC_Q57(5);
+  log_qexp=cpi->log_qavg[_qti][_qi]-OC_Q57(2);
   log_qexp=(log_qexp>>6)*(cpi->rc.exp[_qti]);
-  log_scale=OC_Q57(8)+log_bits-cpi->rc.log_npixels+log_qexp;
-  scale=(ogg_uint32_t)oc_bexp64(OC_MINI(log_scale,OC_Q57(16)));
+  log_scale=OC_MINI(log_bits-cpi->rc.log_npixels+log_qexp,OC_Q57(16));
   /*Use it to set that factor directly if this was a trial.*/
-  if(_trial)cpi->rc.scale[_qti]=scale;
+  if(_trial)cpi->rc.log_scale[_qti]=log_scale;
   /*Otherwise update an exponential moving average.*/
   else{
-    cpi->rc.scale[_qti]=(scale<<16)
-     +(cpi->rc.scale[_qti]-scale)*OC_RATE_SMOOTHING[_qti]>>16;
+    cpi->rc.log_scale[_qti]=log_scale
+     +(cpi->rc.log_scale[_qti]-log_scale+128>>8)*OC_SCALE_SMOOTHING[_qti];
   }
   /*Update the buffer fullness level.*/
   if(!_trial){
@@ -141,9 +140,9 @@
   ogg_uint32_t next_key_frame;
   int          nframes[2];
   int          buf_delay;
-  int          qtarget;
+  ogg_int64_t  log_qtarget;
   int          best_qi;
-  int          best_qdiff;
+  ogg_int64_t  best_qdiff;
   int          qi;
   /*Figure out how to re-distribute bits so that we hit our fullness target
      before the last keyframe in our current buffer window (after the current
@@ -161,7 +160,7 @@
    +buf_delay*cpi->rc.bits_per_frame;
   /*If there aren't enough bits to achieve our desired fullness level, use the
      minimum quality permitted.*/
-  if(rate_total<=0)qtarget=OC_QUANT_MAX<<3;
+  if(rate_total<=0)log_qtarget=OC_QUANT_MAX_LOG;
   else{
     static const unsigned char KEY_RATIO[2]={29,32};
     ogg_int64_t   log_scale0;
@@ -169,10 +168,9 @@
     ogg_int64_t   prevr;
     ogg_int64_t   curr;
     ogg_int64_t   realr;
-    ogg_int64_t   log_qtarget;
     int           i;
-    log_scale0=oc_blog64(cpi->rc.scale[_qti])-OC_Q57(8)+cpi->rc.log_npixels;
-    log_scale1=oc_blog64(cpi->rc.scale[1-_qti])-OC_Q57(8)+cpi->rc.log_npixels;
+    log_scale0=cpi->rc.log_scale[_qti]+cpi->rc.log_npixels;
+    log_scale1=cpi->rc.log_scale[1-_qti]+cpi->rc.log_npixels;
     curr=(rate_total+(buf_delay>>1))/buf_delay;
     realr=curr*KEY_RATIO[_qti]+16>>5;
     for(i=0;i<10;i++){
@@ -181,7 +179,6 @@
       ogg_int64_t log_rpow;
       ogg_int64_t rscale;
       ogg_int64_t drscale;
-      ogg_int64_t mask;
       ogg_int64_t bias;
       prevr=curr;
       log_rpow=oc_blog64(prevr)-log_scale0;
@@ -194,42 +191,45 @@
        cpi->rc.exp[1-_qti]/prevr;
       rderiv=nframes[_qti]*KEY_RATIO[_qti]+drscale;
       if(rderiv==0)break;
-      mask=OC_SIGNMASK(rdiff)^OC_SIGNMASK(rderiv);
-      bias=rderiv+mask^mask;
+      bias=rderiv+OC_SIGNMASK(rdiff^rderiv)^OC_SIGNMASK(rdiff^rderiv);
       curr=prevr-((rdiff<<1)+bias)/(rderiv<<1);
       realr=curr*KEY_RATIO[_qti]+16>>5;
       if(curr<=0||realr>rate_total||prevr==curr)break;
     }
-    log_qtarget=OC_Q57(5)-((oc_blog64(realr)-log_scale0+(cpi->rc.exp[_qti]>>1))/
+    log_qtarget=OC_Q57(2)-((oc_blog64(realr)-log_scale0+(cpi->rc.exp[_qti]>>1))/
      cpi->rc.exp[_qti]<<6);
-    qtarget=(int)oc_bexp64(OC_MINI(log_qtarget,OC_Q57(15)));
+    log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
   }
-  /*If this was not one of the initial frames, limit a change in quality.*/
+  /*If this was not one of the initial frames, limit the change in quality.*/
   if(!_trial){
-    int qmin;
-    int qmax;
-    /*TODO: With user-specified quant matrices, we need to enlarge these limits
+    ogg_int64_t log_qmin;
+    ogg_int64_t log_qmax;
+    /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the
+       current quantizer.
+      TODO: With user-specified quant matrices, we need to enlarge these limits
        if they don't actually let us change qi values.*/
-    qmin=cpi->qavg[_qti][cpi->BaseQ]*13>>4;
-    qmax=cpi->qavg[_qti][cpi->BaseQ]*5>>2;
-    qtarget=OC_CLAMPI(qmin,qtarget,qmax);
+    log_qmin=cpi->log_qavg[_qti][cpi->BaseQ]-0x00A4D3C25E68DC58LL;
+    log_qmax=cpi->log_qavg[_qti][cpi->BaseQ]+0x00A4D3C25E68DC58LL;
+    log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax);
   }
   /*Search for the quantizer that matches the target most closely.
     We don't assume a linear ordering, but when there are ties we do pick the
      quantizer closest to the current one.*/
   best_qi=cpi->info.quality;
-  best_qdiff=abs(cpi->qavg[_qti][best_qi]-qtarget);
+  best_qdiff=cpi->log_qavg[_qti][best_qi]-log_qtarget;
+  best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff);
   for(qi=cpi->info.quality+1;qi<64;qi++){
-    int qdiff;
-    qdiff=abs(cpi->qavg[_qti][qi]-qtarget);
+    ogg_int64_t qdiff;
+    qdiff=cpi->log_qavg[_qti][qi]-log_qtarget;
+    qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff);
     if(qdiff<best_qdiff||
      qdiff==best_qdiff&&abs(qi-cpi->BaseQ)<abs(best_qi-cpi->BaseQ)){
       best_qi=qi;
       best_qdiff=qdiff;
     }
   }
-  /*Save these parameters for lambda calculations.*/
-  cpi->rc.qtarget=qtarget;
+  /*Save the quantizer target for lambda calculations.*/
+  cpi->rc.log_qtarget=log_qtarget;
   return best_qi;
 }
 

Modified: branches/theora-thusnelda/lib/enc/enquant.h
===================================================================
--- branches/theora-thusnelda/lib/enc/enquant.h	2009-03-24 22:01:40 UTC (rev 15841)
+++ branches/theora-thusnelda/lib/enc/enquant.h	2009-03-26 21:43:13 UTC (rev 15842)
@@ -4,6 +4,8 @@
 
 typedef struct oc_iquant oc_iquant;
 
+#define OC_QUANT_MAX_LOG (OC_Q57(OC_STATIC_ILOG_32(OC_QUANT_MAX)-1))
+
 /*Used to compute x/d via ((x*m>>16)+x>>l)+(x<0))
    (i.e., one 16x16->16 mul, 2 shifts, and 2 adds).
   This is not an approximation; for 16-bit x and d, it is exact.*/