[xiph-commits] r14779 - branches/theora-thusnelda/lib/enc

Sat Apr 19 06:38:12 PDT 2008

Author: xiphmont
Date: 2008-04-19 06:38:11 -0700 (Sat, 19 Apr 2008)
New Revision: 14779

Modified:
   branches/theora-thusnelda/lib/enc/codec_internal.h
   branches/theora-thusnelda/lib/enc/encoder_quant.c
   branches/theora-thusnelda/lib/enc/mode.c
Log:
Get a bit of the rho-domain code into SVN; does not yet do anything.  
Beginning as a test of CPU performance impact.



Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================

--- branches/theora-thusnelda/lib/enc/codec_internal.h	2008-04-19 02:11:37 UTC (rev 14778)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h	2008-04-19 13:38:11 UTC (rev 14779)
@@ -152,10 +152,10 @@
   int m[16]; // hilbert order; 4 for Y, 4 for UZ in 4:4:4, 8 for UV in 4:2:2, 16 for UV in 4:2:0
 } superblock_t;
 
-typedef ogg_int16_t    quant_table[64];
+typedef ogg_int16_t    quant_table[64]; 
 typedef quant_table    quant_tables[64];
 
-typedef ogg_int32_t    iquant_table[64];
+typedef ogg_int32_t    iquant_table[64];  
 typedef iquant_table   iquant_tables[64];
 
 typedef struct {
@@ -255,6 +255,9 @@
 
   /********************************************************************/
   /* Fragment SAD->bitrate estimation tracking metrics */
+  unsigned char    rho_lookup[2][3][64][OC_QUANT_MAX>>2];
+  ogg_uint32_t     rho_count[65]; 
+
 #ifdef COLLECT_METRICS
   int             *frag_mbi;
   int             *frag_sad;

Modified: branches/theora-thusnelda/lib/enc/encoder_quant.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_quant.c	2008-04-19 02:11:37 UTC (rev 14778)
+++ branches/theora-thusnelda/lib/enc/encoder_quant.c	2008-04-19 13:38:11 UTC (rev 14779)
@@ -164,14 +164,14 @@
 	  q=((ogg_uint32_t)qinfo->dc_scale[qi]*base[0]/100)<<2;
 	  q=OC_CLAMPI(OC_DC_QUANT_MIN[qti],q,OC_QUANT_MAX);
 	  cpi->quant_tables[qti][pli][qi][0]=(ogg_uint16_t)q;
-	  cpi->iquant_tables[qti][pli][qi][0]=(ogg_int32_t)(0.5 + (double)SHIFT16/q);
+	  cpi->iquant_tables[qti][pli][qi][0]=(ogg_int32_t)(((1<<31))/q+1);
 
 	  /*Now scale AC coefficients from the proper table.*/
 	  for(ci=1;ci<64;ci++){
 	    q=((ogg_uint32_t)qinfo->ac_scale[qi]*base[ci]/100)<<2;
 	    q=OC_CLAMPI(OC_AC_QUANT_MIN[qti],q,OC_QUANT_MAX);
 	    cpi->quant_tables[qti][pli][qi][zigzag_index[ci]]=(ogg_uint16_t)q;
-	    cpi->iquant_tables[qti][pli][qi][ci]=(ogg_int32_t)(0.5 + (double)SHIFT16/q);
+	    cpi->iquant_tables[qti][pli][qi][ci]=(ogg_int32_t)(((1<<31))/q+1);
 	  }
 	  
 	  if(++qi>=qi_end)break;
@@ -201,9 +201,16 @@
   
   /* Note that we add half divisor to effect rounding on positive number */
   for( i = 0; i < 64; i++) {
-    int val = ( (q[i] * in[i] + (1<<15)) >> 16 );
-    if(val>511)val=511;
-    if(val<-511)val=-511;
-    out[zigzag_index[i]] = val;
+    // the extra precision version to perfectly match dequant and thus rho metrics.  It's about a 2% speed penalty. 
+    int val = (((q[i]>>15)*in[i]) + (1<<15) + (((q[i]&0x7fff)*in[i])>>15)) >>16;
+    if(val==0){
+	out[zigzag_index[i]] = 0;
+    }else if(val>511){
+      out[zigzag_index[i]] = 511;
+    }else if (val<-511){
+      out[zigzag_index[i]] = -511;
+    }else{
+      out[zigzag_index[i]] = val;
+    }
   }
 }

Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c	2008-04-19 02:11:37 UTC (rev 14778)
+++ branches/theora-thusnelda/lib/enc/mode.c	2008-04-19 13:38:11 UTC (rev 14779)
@@ -487,6 +487,186 @@
     sad[7][2][0] = BInterSAD(cpi,fi,2,0,ch);
 }
 
+#include "quant_lookup.h"
+static int find_nonzero_transition(quant_tables *q, int pos, ogg_int16_t val){
+  int i;
+  
+  val = (abs(val)<<1);
+
+  if( val < (*q)[32][pos]){
+    if( val < (*q)[48][pos]){
+      if( val < (*q)[56][pos]){
+	if( val < (*q)[60][pos]){
+	  if( val < (*q)[62][pos]){
+	    if( val < (*q)[63][pos])return 64;
+	    return 63;
+	  }else{
+	    if( val < (*q)[61][pos])return 62;
+	    return 61;
+	  }
+	}else{
+	  if( val < (*q)[58][pos]){
+	    if( val < (*q)[59][pos])return 60;
+	    return 59;
+	  }else{
+	    if( val < (*q)[57][pos])return 58;
+	    return 57;
+	  }
+	}
+      }else{
+	if( val < (*q)[52][pos]){
+	  if( val < (*q)[54][pos]){
+	    if( val < (*q)[55][pos])return 56;
+	    return 55;
+	  }else{
+	    if( val < (*q)[53][pos])return 54;
+	    return 53;
+	  }
+	}else{
+	  if( val < (*q)[50][pos]){
+	    if( val < (*q)[51][pos])return 52;
+	    return 51;
+	  }else{
+	    if( val < (*q)[49][pos])return 50;
+	    return 49;
+	  }
+	}
+      }
+    }else{
+      if( val < (*q)[40][pos]){
+	if( val < (*q)[44][pos]){
+	  if( val < (*q)[46][pos]){
+	    if( val < (*q)[47][pos])return 48;
+	    return 47;
+	  }else{
+	    if( val < (*q)[45][pos])return 46;
+	    return 45;
+	  }
+	}else{
+	  if( val < (*q)[42][pos]){
+	    if( val < (*q)[43][pos])return 44;
+	    return 43;
+	  }else{
+	    if( val < (*q)[41][pos])return 42;
+	    return 41;
+	  }
+	}
+      }else{
+	if( val < (*q)[36][pos]){
+	  if( val < (*q)[38][pos]){
+	    if( val < (*q)[39][pos])return 40;
+	    return 39;
+	  }else{
+	    if( val < (*q)[37][pos])return 38;
+	    return 37;
+	  }
+	}else{
+	  if( val < (*q)[34][pos]){
+	    if( val < (*q)[35][pos])return 36;
+	    return 35;
+	  }else{
+	    if( val < (*q)[33][pos])return 34;
+	    return 33;
+	  }
+	}
+      }
+    }
+  }else{
+    if( val < (*q)[16][pos]){
+      if( val < (*q)[24][pos]){
+	if( val < (*q)[28][pos]){
+	  if( val < (*q)[30][pos]){
+	    if( val < (*q)[31][pos])return 32;
+	    return 31;
+	  }else{
+	    if( val < (*q)[29][pos])return 30;
+	    return 29;
+	  }
+	}else{
+	  if( val < (*q)[26][pos]){
+	    if( val < (*q)[27][pos])return 28;
+	    return 27;
+	  }else{
+	    if( val < (*q)[25][pos])return 26;
+	    return 25;
+	  }
+	}
+      }else{
+	if( val < (*q)[20][pos]){
+	  if( val < (*q)[22][pos]){
+	    if( val < (*q)[23][pos])return 24;
+	    return 23;
+	  }else{
+	    if( val < (*q)[21][pos])return 22;
+	    return 21;
+	  }
+	}else{
+	  if( val < (*q)[18][pos]){
+	    if( val < (*q)[19][pos])return 20;
+	    return 19;
+	  }else{
+	    if( val < (*q)[17][pos])return 18;
+	    return 17;
+	  }
+	}
+      }
+    }else{
+      if( val < (*q)[8][pos]){
+	if( val < (*q)[12][pos]){
+	  if( val < (*q)[14][pos]){
+	    if( val < (*q)[15][pos])return 16;
+	    return 15;
+	  }else{
+	    if( val < (*q)[13][pos])return 14;
+	    return 13;
+	  }
+	}else{
+	  if( val < (*q)[10][pos]){
+	    if( val < (*q)[11][pos])return 12;
+	    return 11;
+	  }else{
+	    if( val < (*q)[9][pos])return 10;
+	    return 9;
+	  }
+	}
+      }else{
+	if( val < (*q)[4][pos]){
+	  if( val < (*q)[6][pos]){
+	    if( val < (*q)[7][pos])return 8;
+	    return 7;
+	  }else{
+	    if( val < (*q)[5][pos])return 6;
+	    return 5;
+	  }
+	}else{
+	  if( val < (*q)[2][pos]){
+	    if( val < (*q)[3][pos])return 4;
+	    return 3;
+	  }else{
+	    if( val < (*q)[1][pos])return 2;
+	    if( val < (*q)[0][pos])return 1;
+	  }
+	}
+      }
+    }
+  }
+
+  return 0;
+}
+
+/* rho computation and quant/dequant should be in bed together.  They're not... yet */
+static void collect_rho(CP_INSTANCE *cpi, int mode, int plane, ogg_int16_t *buffer){
+  int pos[64];
+  int i;
+  int interp = (mode != CODE_INTRA);
+  quant_tables *q = &cpi->quant_tables[interp][plane];
+
+  for(i=0;i<64;i++){
+    int ii = zigzag_index[i];
+    pos[ii] = find_nonzero_transition(q,ii,buffer[i]);
+  }
+}
+
 static void TQB (CP_INSTANCE *cpi, int mode, int fi, ogg_int32_t *iq, ogg_int16_t *q, mv_t mv, int plane){
   if ( cpi->frag_coded[fi] ) {
     ogg_int16_t buffer[64];
@@ -544,7 +724,8 @@
     dsp_fdct_short(cpi->dsp, data, buffer);
     
     /* collect rho metrics */
-    
+    collect_rho(cpi, mode, plane, buffer);
+
     /* quantize */
     quantize (cpi, iq, buffer, data);
     cpi->frag_dc[fi] = cpi->frag_dct[fi].data[0];