[xiph-commits] r15573 - branches/theora-thusnelda/lib/enc

Thu Dec 11 16:06:04 PST 2008

Author: xiphmont
Date: 2008-12-11 16:06:04 -0800 (Thu, 11 Dec 2008)
New Revision: 15573

Modified:
   branches/theora-thusnelda/lib/enc/codec_internal.h
   branches/theora-thusnelda/lib/enc/dct_encode.c
   branches/theora-thusnelda/lib/enc/encoder_toplevel.c
   branches/theora-thusnelda/lib/enc/frarray.c
   branches/theora-thusnelda/lib/enc/mode.c
Log:
Addition SKIP and token opt fixes/improvements

SKIP and token opt lambdas now reconciled.



Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================

--- branches/theora-thusnelda/lib/enc/codec_internal.h	2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h	2008-12-12 00:06:04 UTC (rev 15573)
@@ -293,9 +293,7 @@
   /********************************************************************/
   /* Setup */
   int              keyframe_granule_shift;
-  int              skip_lambda;
-  int              mv_lambda;
-  int              token_lambda;
+  int              lambda;
   int              BaseQ;
   int              GoldenFrameEnabled;
   int              InterPrediction;

Modified: branches/theora-thusnelda/lib/enc/dct_encode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/dct_encode.c	2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/dct_encode.c	2008-12-12 00:06:04 UTC (rev 15573)
@@ -480,52 +480,47 @@
     
   while(i < BLOCK_SIZE){
     int ret;
+    int od = origdct[dezigzag_index[i]];
+    int bestd=0,d = dct[i];
+    int bestmin;
+    int cost,cost2=0,bestcost=0;
+    int j=i+1,k;
 
-    /* determine costs for encoding this value (and any preceeding
-       eobrun/zerorun) as well as the cost for encoding a demoted token */
-    int costA = tokenize_dctcost(cpi,chroma,coeff,i,dct[i]),costB;
-    int costD = costA;
-    int dval = (dct[i]>0 ? dct[i]-1 : dct[i]+1);
-    int j=i+1;
     while((j < BLOCK_SIZE) && !dct[j] ) j++;
 
-    if(dval){
-      /* demoting will not produce a zero. */
-      costD -= costB = tokenize_dctcost(cpi,chroma,coeff,i,dval);
+    if(j==BLOCK_SIZE){
+      cost = tokenize_eobcost(cpi,chroma,coeff);
+      if(i+1<BLOCK_SIZE) 
+	cost2 = tokenize_eobcost(cpi,chroma,i+1);
     }else{
-      /* demoting token will produce a zero. */
-      costB = 0;
-      if(j==BLOCK_SIZE){
-	if(i+1<BLOCK_SIZE) 
-	  costD += tokenize_eobcost(cpi,chroma,i+1);
-	costD -= tokenize_eobcost(cpi,chroma,coeff);
-      }else{
-	costD += tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
-	costD -= tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
-      }
+      cost = tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
+      cost2 = tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
     }
+    bestmin = od*od+cost*cpi->lambda;
+    
 
-    if(costD>0){
-      /* demoting results in a cheaper token cost.  Is the bit savings worth the added distortion? */
-      int ii = dezigzag_index[i];
-      int od = dct[i]*dequant[i] - origdct[ii];
-      int dd = dval*dequant[i] - origdct[ii];
-      int delta = dd*dd - od*od;
-      
-      if(delta < costD*cpi->token_lambda){
-	/* we have a winner.  Demote token */
-	dct[i]=dval;
-	costA=costB;
-	
-	if(dval==0){
-	  if(j==BLOCK_SIZE) break;
-	  i=j;
-	  continue;
-	}
+    for(k=1;k<=abs(d);k++){
+      int dval = (d>0 ? k : -k);
+      int dd = dval*dequant[i] - od;
+      int min = dd*dd;
+      cost = tokenize_dctcost(cpi,chroma,coeff,i,dval);
+
+      min += (cost+cost2)*cpi->lambda;
+      if(min<bestmin){
+	bestmin=min;
+	bestcost=cost;
+	bestd=dval;
       }
     }
 
-    retcost+=costA;
+    dct[i]=bestd;
+    if(bestd==0){
+      if(j==BLOCK_SIZE) break;
+      i=j;
+      continue;
+    }
+    
+    retcost+=bestcost;
 	
     ret = tokenize_dctval(cpi, chroma, fi, coeff, i, dct[i], stack);
     if(!ret)

Modified: branches/theora-thusnelda/lib/enc/encoder_toplevel.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_toplevel.c	2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/encoder_toplevel.c	2008-12-12 00:06:04 UTC (rev 15573)
@@ -119,9 +119,7 @@
   cpi->BaseQ = c->quality;
 
   /* temporary while the RD code is only partially complete */
-  cpi->skip_lambda=0;
-  cpi->token_lambda=0;
-  cpi->mv_lambda=0;
+  cpi->lambda=200;
 
   /* Set encoder flags. */
   /* if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency */

Modified: branches/theora-thusnelda/lib/enc/frarray.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frarray.c	2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/frarray.c	2008-12-12 00:06:04 UTC (rev 15573)
@@ -414,9 +414,9 @@
   fr_skipblock(NULL,&temp);
   fr_skipblock(NULL,&temp);
   fr_skipblock(NULL,&temp);
-  fr_finishsb(NULL,&temp);
+  //fr_finishsb(NULL,&temp);
   cost=temp.cost;
   temp=*post;
-  fr_finishsb(NULL,&temp);
+  //fr_finishsb(NULL,&temp);
   return temp.cost - cost;
 }

Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c	2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/mode.c	2008-12-12 00:06:04 UTC (rev 15573)
@@ -363,7 +363,6 @@
   macroblock_t *mb = &cpi->macro[mbi];
   int i,j;
   int cost = 0;
-
   for(i=0;i<3;i++){
     for(j=0;j<4;j++){
       int fi=mb->Ryuv[i][j];
@@ -527,6 +526,7 @@
 }
 
 /* coding overhead is unscaled */
+#include<stdio.h>
 static int TQB (CP_INSTANCE *cpi, plane_state_t *ps, int mode, int fi, mv_t mv, 
 		int coding_overhead, rd_metric_t *mo, long *rho_count,
 		token_checkpoint_t **stack){
@@ -544,9 +544,9 @@
   unsigned char *thisrecon = cpi->recon+bi;
   int nonzero=0;
   const ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
-  int uncoded_ssd=0,coded_ssd=0,coded_partial_ssd=0;
+  int uncoded_ssd=0,coded_ssd=0;
   int uncoded_dc=0,coded_dc=0,dc_flag=0;
-  int lambda = cpi->skip_lambda;
+  int lambda = cpi->lambda;
   token_checkpoint_t *checkpoint=*stack;
   int cost;
   int i;
@@ -632,7 +632,6 @@
 	uncoded_dc += buffer[i];
       }
     }
-    uncoded_ssd*=ps->ssdmul;
     uncoded_ssd <<= 4; /* scale to match DCT domain */
   }
 
@@ -656,26 +655,16 @@
       //rho_count[pos]++;
 
       if((abs(v)<<1)>=dequant[i]){
-	int d;
 	int val = (((iq[i]>>15)*v) + (1<<15) + (((iq[i]&0x7fff)*v)>>15)) >>16;
-	val = (val>511?511:(val<-511?-511:val));
-
-	d = val*dequant[i]-v;
-	coded_partial_ssd += d*d;
-	data[i] = val;
+	data[i] = (val>511?511:(val<-511?-511:val));
 	nonzero=i;
       }else{
-	coded_partial_ssd += v*v;
 	data[i] = 0;
       }
     }
-
-    /* for undersampled planes */
-    coded_partial_ssd*=ps->ssdmul;
-
   }
   cpi->frag_dc[fi] = data[0];
-  
+
   /* tokenize */
   cost = dct_tokenize_AC(cpi, fi, data, dequant, buffer, fi>=cpi->frag_n[0], stack);
   
@@ -700,27 +689,27 @@
     int i;
 
     /* in retrospect, should we have skipped this block? */
-    /* we are free to apply any distortion measure we like as we have
-       the full original block and fully reconstructed block with
-       which to do so.*/
-    /* for now, straight up SSD */
     dsp_sub8x8(cpi->dsp, frame_ptr, thisrecon, buffer, stride);    
     for(i=0;i<64;i++){
       coded_ssd += buffer[i]*buffer[i];
       coded_dc += buffer[i];
     }
     coded_ssd <<= 4; /* scale to match DCT domain */
-    coded_ssd*=ps->ssdmul; /* for undersampled planes */
     
     /* We actually only want the AC contribution to the SSDs */
     uncoded_ssd -= ((uncoded_dc*uncoded_dc)>>2);
     coded_ssd -= ((coded_dc*coded_dc)>>2);
+
+    /* for undersampled planes */
+    //coded_ssd*=ps->ssdmul; 
+    //uncoded_ssd*=ps->ssdmul;
+
     mo->uncoded_ac_ssd+=uncoded_ssd;  
 
-    /* DC is a special visual case; if there's more than a
-       half-quantizer improvement in the effective DC component, code
+    /* DC is a special case; if there's more than a full-quantizer
+       improvement in the effective DC component, always force-code
        the block */
-    if( abs(uncoded_dc)-abs(coded_dc) > dequant[0]){
+    if( abs(uncoded_dc)-abs(coded_dc) > (dequant[0]<<1)){
       mo->dc_flag = dc_flag = 1;
     }
        
@@ -731,10 +720,13 @@
       uncode_frag(cpi,fi,ps->plane);
       
       mo->coded_ac_ssd+=uncoded_ssd;
+      //fprintf(stderr,"skip(%d:%d)",coding_overhead,cost);
       
       return 0;
     }else{
       
+      //fprintf(stderr,"*****(%d:%d)",coding_overhead,cost);
+
       mo->coded_ac_ssd+=coded_ssd;
       mo->ac_cost+=cost;
       
@@ -791,7 +783,7 @@
       /* block by block, still coding the MB.  Now consider the
 	 macroblock coding cost as a whole (mode and MV) */ 
       int codecost = mo.ac_cost+fr_cost4(&fr_checkpoint,fr)+(mode_overhead>>OC_BIT_SCALE);
-      if(mo.uncoded_ac_ssd <= mo.coded_ac_ssd+cpi->skip_lambda*codecost){
+      if(mo.uncoded_ac_ssd <= mo.coded_ac_ssd+cpi->lambda*codecost){
 	
 	/* taking macroblock overhead into account, it is not worth coding this MB */
 	tokenlog_rollback(cpi, stack, stackptr-stack);
@@ -1271,26 +1263,23 @@
       int fi = tfi[ti];
       actual_bits[fi] += (bits<<OC_BIT_SCALE);
     }else{
-      /* EOB run; its bits should be split up between all the fragments in the run */
 
       int run = parse_eob_run(token, cpi->dct_token_eb[group][ti]);
-      int fracbits = ((bits<<OC_BIT_SCALE) + (run>>1))/run;
+      int fi = stack[eobcounts[group]];
+      actual_bits[fi]+=(bits<<OC_BIT_SCALE);
       
       if(ti+1<tn){
 	/* tokens follow EOB so it must be entirely ensconced within this plane/group */
-	while(run--){
-	  int fi = stack[eobcounts[group]++];
-	  actual_bits[fi]+=fracbits;
-	}
+	eobcounts[group]+=run;
       }else{
 	/* EOB is the last token in this plane/group, so it may span into the next plane/group */
 	int n = cpi->dct_eob_fi_count[group];
 	while(run){
-	  while(eobcounts[group] < n && run){
-	    int fi = stack[eobcounts[group]++];
-	    actual_bits[fi]+=fracbits;
-	    run--;
-	  }
+	  int rem = n - eobcounts[group];
+	  if(rem>run)rem=run;
+
+	  eobcounts[group]+=rem;
+	  run -= rem;
 	  if(run){
 	    group++;
 	    n = cpi->dct_eob_fi_count[group];
@@ -1345,36 +1334,35 @@
   for(fi=0;fi<v;fi++)
     if(cp[fi]){
       int mbi = mp[fi];
-      if(mbi>=0){
-	macroblock_t *mb = &cpi->macro[mbi];
-	int mode = mb->mode;
-	int plane = (fi<y ? 0 : (fi<u ? 1 : 2));
-	int bin = BIN(sp[fi]);
-	mode_metric[qi][plane][mode==CODE_INTRA].frag[bin]++;
-	mode_metric[qi][plane][mode==CODE_INTRA].sad[bin] += sp[fi];
-	mode_metric[qi][plane][mode==CODE_INTRA].bits[bin] += actual_bits[fi];
+      macroblock_t *mb = &cpi->macro[mbi];
+      int mode = mb->mode;
+      int plane = (fi<y ? 0 : (fi<u ? 1 : 2));
+      int bin = BIN(sp[fi]);
+      mode_metric[qi][plane][mode==CODE_INTRA].frag[bin]++;
+      mode_metric[qi][plane][mode==CODE_INTRA].sad[bin] += sp[fi];
+      mode_metric[qi][plane][mode==CODE_INTRA].bits[bin] += actual_bits[fi];
+      
+      if(0){
+	int bi = cpi->frag_buffer_index[fi];
+	unsigned char *frame = cpi->frame+bi;
+	unsigned char *recon = cpi->lastrecon+bi;
+	int stride = cpi->stride[plane];
+	int lssd=0;
+	int xi,yi;
 	
-	if(0){
-	  int bi = cpi->frag_buffer_index[fi];
-	  unsigned char *frame = cpi->frame+bi;
-	  unsigned char *recon = cpi->lastrecon+bi;
-	  int stride = cpi->stride[plane];
-	  int lssd=0;
-	  int xi,yi;
-	  
-	  for(yi=0;yi<8;yi++){
-	    for(xi=0;xi<8;xi++)
-	      lssd += (frame[xi]-recon[xi])*(frame[xi]-recon[xi]);
-	    frame+=stride;
-	    recon+=stride;
-	  }
-	  cpi->dist_dist[plane][mode] += lssd;
-	  cpi->dist_bits[plane][mode] += actual_bits[fi];
+	for(yi=0;yi<8;yi++){
+	  for(xi=0;xi<8;xi++)
+	    lssd += (frame[xi]-recon[xi])*(frame[xi]-recon[xi]);
+	  frame+=stride;
+	  recon+=stride;
 	}
+	cpi->dist_dist[plane][mode] += lssd;
+	cpi->dist_bits[plane][mode] += actual_bits[fi];
       }
     }
 
 
+
   /* update global SAD/rate estimation matrix */
   UpdateModeEstimation(cpi);
 }