[xiph-commits] r15067 - branches/theora-thusnelda/lib/enc

xiphmont at svn.xiph.org xiphmont at svn.xiph.org
Tue Jun 24 16:17:14 PDT 2008


Author: xiphmont
Date: 2008-06-24 16:17:13 -0700 (Tue, 24 Jun 2008)
New Revision: 15067

Modified:
   branches/theora-thusnelda/lib/enc/codec_internal.h
   branches/theora-thusnelda/lib/enc/dsp.h
   branches/theora-thusnelda/lib/enc/frarray.c
   branches/theora-thusnelda/lib/enc/frinit.c
   branches/theora-thusnelda/lib/enc/mode.c
Log:
Complete rewrite of the DFarray code; it's now tracked and coded as part of the main encode loop so that the bit usage information of the block runs is available to block coding decisions.



Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================
--- branches/theora-thusnelda/lib/enc/codec_internal.h	2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h	2008-06-24 23:17:13 UTC (rev 15067)
@@ -26,6 +26,7 @@
 
 #include "theora/theora.h"
 #include "encoder_huffman.h"
+typedef struct CP_INSTANCE CP_INSTANCE;
 #include "dsp.h"
 
 #define theora_read(x,y,z) ( oggpackB_read(x,y,z) )
@@ -154,9 +155,6 @@
 typedef struct superblock {
   int f[16]; // hilbert order
   int m[16]; // hilbert order: only 4 for luma, but 16 for U/V (to match f) */
-
-  int partial;
-  int coded;
 } superblock_t;
 
 typedef ogg_int16_t    quant_table[64]; 
@@ -187,7 +185,7 @@
 } oc_mode_scheme_chooser;
 
 /* Encoder (Compressor) instance -- installed in a theora_state */
-typedef struct CP_INSTANCE {
+struct CP_INSTANCE {
   /*This structure must be first.
     It contains entry points accessed by the decoder library's API wrapper, and
      is the only assumption that library makes about our internal format.*/
@@ -244,6 +242,12 @@
   
   /*********************************************************************/
   /* Token Buffers */
+  int             *fr_partial;
+  unsigned char   *fr_partial_bits;
+  int             *fr_full;
+  unsigned char   *fr_full_bits;
+  ogg_int16_t     *fr_block;
+  unsigned char   *fr_block_bits;
 
   unsigned char   *dct_token_storage;
   ogg_uint16_t    *dct_token_eb_storage;
@@ -298,7 +302,7 @@
 
   DspFunctions     dsp;  /* Selected functions for this platform */
 
-} CP_INSTANCE;
+};
 
 #define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
 
@@ -325,8 +329,6 @@
 extern void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
                               oggpack_buffer *opb);
 
-extern void PackAndWriteDFArray( CP_INSTANCE *cpi );
-
 extern void WriteFrameHeader( CP_INSTANCE *cpi) ;
 
 extern void EncodeData(CP_INSTANCE *cpi);
@@ -357,6 +359,33 @@
 
 extern void ClearFrameInfo (CP_INSTANCE *cpi);
 
+typedef struct {
+  int cpi_partial_count;
+  int cpi_full_count;
+  int cpi_block_count;
+
+  ogg_uint16_t  sb_partial_count;
+  ogg_uint16_t sb_full_count;
+
+  signed char sb_partial_last;
+  signed char sb_full_last;
+  signed char b_last;
+  signed char b_count;
+  signed char b_pend;
+
+  char sb_partial_break;
+  char sb_full_break;
+  char sb_partial;
+  char sb_coded;
+
+} fr_state_t;
+
+void fr_clear(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_skipblock(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_codeblock(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_finishsb(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_write(CP_INSTANCE *cpi, fr_state_t *fr);
+
 #ifdef COLLECT_METRICS
 extern void ModeMetrics(CP_INSTANCE *cpi, int huff[4]);
 extern void DumpMetrics(CP_INSTANCE *cpi);

Modified: branches/theora-thusnelda/lib/enc/dsp.h
===================================================================
--- branches/theora-thusnelda/lib/enc/dsp.h	2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/dsp.h	2008-06-24 23:17:13 UTC (rev 15067)
@@ -73,7 +73,7 @@
   ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, unsigned char *RefDataPtr1,
 				   unsigned char *RefDataPtr2, ogg_uint32_t stride);
                
-  void (*LoopFilter)              (void *cpi, int FLimit);
+  void (*LoopFilter)              (CP_INSTANCE *cpi, int FLimit);
 
   void (*FilterVert)              (unsigned char * PixelPtr,
 				   ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);

Modified: branches/theora-thusnelda/lib/enc/frarray.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frarray.c	2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/frarray.c	2008-06-24 23:17:13 UTC (rev 15067)
@@ -19,189 +19,287 @@
 #include "codec_internal.h"
 #include <stdio.h>
 
-/* Long run bit string coding */
-static ogg_uint32_t FrArrayCodeSBRun( CP_INSTANCE *cpi, ogg_uint32_t value){
-  ogg_uint32_t CodedVal = 0;
-  ogg_uint32_t CodedBits = 0;
+void fr_clear(CP_INSTANCE *cpi, fr_state_t *fr){
+  fr->sb_partial_last = -1;
+  fr->sb_partial_count = 0;
+  fr->sb_partial_break = 0;
 
-  /* Coding scheme:
-        Codeword              RunLength
-      0                       1
-      10x                     2-3
-      110x                    4-5
-      1110xx                  6-9
-      11110xxx                10-17
-      111110xxxx              18-33
-      111111xxxxxxxxxxxx      34-4129 */
+  fr->sb_full_last = -1;
+  fr->sb_full_count = 0;
+  fr->sb_full_break = 0;
 
-  if ( value == 1 ){
-    CodedVal = 0;
-    CodedBits = 1;
-  } else if ( value <= 3 ) {
-    CodedVal = 0x0004 + (value - 2);
-    CodedBits = 3;
-  } else if ( value <= 5 ) {
-    CodedVal = 0x000C + (value - 4);
-    CodedBits = 4;
-  } else if ( value <= 9 ) {
-    CodedVal = 0x0038 + (value - 6);
-    CodedBits = 6;
-  } else if ( value <= 17 ) {
-    CodedVal = 0x00F0 + (value - 10);
-    CodedBits = 8;
-  } else if ( value <= 33 ) {
-    CodedVal = 0x03E0 + (value - 18);
-    CodedBits = 10;
-  } else {
-    CodedVal = 0x3F000 + (value - 34);
-    CodedBits = 18;
-  }
+  fr->b_last = -1;
+  fr->b_count = 0;
+  fr->b_pend = 0;
 
-  /* Add the bits to the encode holding buffer. */
-  oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
+  fr->sb_partial=0;
+  fr->sb_coded=0;
 
-  return CodedBits;
+  fr->cpi_partial_count=0;
+  fr->cpi_full_count=0;
+  fr->cpi_block_count=0;
 }
 
-/* Short run bit string coding */
-static ogg_uint32_t FrArrayCodeBlockRun( CP_INSTANCE *cpi,
-                                         ogg_uint32_t value ) {
-  ogg_uint32_t CodedVal = 0;
-  ogg_uint32_t CodedBits = 0;
-
+static int Brun( ogg_uint32_t value, ogg_int16_t *token) {
+  
   /* Coding scheme:
-        Codeword                                RunLength
-        0x                                      1-2
-        10x                                     3-4
-        110x                                    5-6
-        1110xx                                  7-10
-        11110xx                                 11-14
-        11111xxxx                               15-30 */
+     Codeword                                RunLength
+     0x                                      1-2
+     10x                                     3-4
+     110x                                    5-6
+     1110xx                                  7-10
+     11110xx                                 11-14
+     11111xxxx                               15-30 */
 
   if ( value <= 2 ) {
-    CodedVal = value - 1;
-    CodedBits = 2;
+    *token = value - 1;
+    return 2;
   } else if ( value <= 4 ) {
-    CodedVal = 0x0004 + (value - 3);
-    CodedBits = 3;
-
+    *token = 0x0004 + (value - 3);
+    return 3;
   } else if ( value <= 6 ) {
-    CodedVal = 0x000C + (value - 5);
-    CodedBits = 4;
-
+    *token = 0x000C + (value - 5);
+    return 4;
   } else if ( value <= 10 ) {
-    CodedVal = 0x0038 + (value - 7);
-    CodedBits = 6;
-
+    *token = 0x0038 + (value - 7);
+    return 6;
   } else if ( value <= 14 ) {
-    CodedVal = 0x0078 + (value - 11);
-    CodedBits = 7;
+    *token = 0x0078 + (value - 11);
+    return 7;
   } else {
-    CodedVal = 0x01F0 + (value - 15);
-    CodedBits = 9;
+    *token = 0x01F0 + (value - 15);
+    return 9;
  }
+}
 
-  /* Add the bits to the encode holding buffer. */
-  oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
+void fr_skipblock(CP_INSTANCE *cpi, fr_state_t *fr){
+  if(fr->sb_coded){
+    if(!fr->sb_partial){
 
-  return CodedBits;
+      /* superblock was previously fully coded */
+
+      if(fr->b_last==-1){
+	/* first run of the frame */
+	cpi->fr_block[fr->cpi_block_count]=1;
+	cpi->fr_block_bits[fr->cpi_block_count]=1;
+	fr->cpi_block_count++;
+	fr->b_last = 1;
+      }
+
+      if(fr->b_last==1){
+	/* in-progress run also a coded run */
+	fr->b_count += fr->b_pend;
+      }else{
+	/* in-progress run an uncoded run; flush */
+	cpi->fr_block_bits[fr->cpi_block_count] = 
+	  Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+	fr->cpi_block_count++;
+	fr->b_count=fr->b_pend;
+	fr->b_last = 1;
+      }
+    }
+
+    /* add a skip block */
+    if(fr->b_last == 0){
+      fr->b_count++;
+    }else{
+      cpi->fr_block_bits[fr->cpi_block_count] = 
+	Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+      fr->cpi_block_count++;
+      fr->b_count = 1;
+      fr->b_last = 0;
+    }
+  }
+   
+  fr->b_pend++;
+  fr->sb_partial=1;
 }
 
-void PackAndWriteDFArray( CP_INSTANCE *cpi ){
-  ogg_uint32_t  SB, B;
-  int run_last = -1;
-  int run_count = 0;
-  int run_break = 0;
-  int invalid_fi = cpi->frag_total;
-  unsigned char *cp = cpi->frag_coded;
+void fr_codeblock(CP_INSTANCE *cpi, fr_state_t *fr){
+  if(fr->sb_partial){
+    if(!fr->sb_coded){
 
-  /* code the partially coded SB flags */
-  for( SB = 0; SB < cpi->super_total; SB++ ) {
-    superblock_t *sb = &cpi->super[0][SB];
-    int partial = (sb->partial & sb->coded); 
+      /* superblock was previously completely uncoded */
 
-    if(run_last == -1){
-      oggpackB_write( cpi->oggbuffer, partial, 1);      
-      run_last = partial;
+      if(fr->b_last==-1){
+	/* first run of the frame */
+	cpi->fr_block[fr->cpi_block_count]=0;
+	cpi->fr_block_bits[fr->cpi_block_count]=1;
+	fr->cpi_block_count++;
+	fr->b_last = 0;
+      }
+
+      if(fr->b_last==0){
+	/* in-progress run also an uncoded run */
+	fr->b_count += fr->b_pend;
+      }else{
+	/* in-progress run a coded run; flush */
+	cpi->fr_block_bits[fr->cpi_block_count] = 
+	  Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+	fr->cpi_block_count++;
+	fr->b_count=fr->b_pend;
+	fr->b_last = 0;
+      }
     }
-    
-    if(run_last == partial && run_count < 4129){
-      run_count++;
+
+    /* add a coded block */
+    if(fr->b_last == 1){
+      fr->b_count++;
     }else{
-      if(run_break)
-	oggpackB_write( cpi->oggbuffer, partial, 1);
-      
-      run_break=0;
-      FrArrayCodeSBRun( cpi, run_count );      
-      if(run_count >= 4129) run_break = 1;
-      run_count=1;
+      cpi->fr_block_bits[fr->cpi_block_count] = 
+	Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+      fr->cpi_block_count++;
+      fr->b_count = 1;
+      fr->b_last = 1;
     }
-    run_last=partial;
   }
-  if(run_break)
-    oggpackB_write( cpi->oggbuffer, run_last, 1);
-  if(run_count)
-    FrArrayCodeSBRun(cpi, run_count);      
+   
+  fr->b_pend++;
+  fr->sb_coded=1;
+}
 
-  /* code the fully coded/uncoded SB flags */
-  run_last = -1;
-  run_count = 0;
-  run_break = 0;
-  for( SB = 0; SB < cpi->super_total; SB++ ) {
-    superblock_t *sb = &cpi->super[0][SB];
+static int SBRun(ogg_uint32_t value, int *token){
+
+  /* Coding scheme:
+        Codeword              RunLength
+      0                       1
+      10x                     2-3
+      110x                    4-5
+      1110xx                  6-9
+      11110xxx                10-17
+      111110xxxx              18-33
+      111111xxxxxxxxxxxx      34-4129 */
+
+  if ( value == 1 ){
+    *token = 0;
+    return 1;
+  } else if ( value <= 3 ) {
+    *token = 0x0004 + (value - 2);
+    return 3;
+  } else if ( value <= 5 ) {
+    *token = 0x000C + (value - 4);
+    return 4;
+  } else if ( value <= 9 ) {
+    *token = 0x0038 + (value - 6);
+    return 6;
+  } else if ( value <= 17 ) {
+    *token = 0x00F0 + (value - 10);
+    return 8;
+  } else if ( value <= 33 ) {
+    *token = 0x03E0 + (value - 18);
+    return 10;
+  } else {
+    *token = 0x3F000 + (value - 34);
+    return 18;
+  }
+}
+
+void fr_finishsb(CP_INSTANCE *cpi, fr_state_t *fr){
+  /* update partial state */
+  int partial = (fr->sb_partial & fr->sb_coded); 
+  if(fr->sb_partial_last == -1){
+    cpi->fr_partial[fr->cpi_partial_count] = partial;
+    cpi->fr_partial_bits[fr->cpi_partial_count] = 1;
+    fr->cpi_partial_count++;
+    fr->sb_partial_last = partial;
+  }
     
-    if(sb->partial && sb->coded) continue;
+  if(fr->sb_partial_last == partial && fr->sb_partial_count < 4129){
+    fr->sb_partial_count++;
+  }else{
+    if(fr->sb_partial_break){
+      cpi->fr_partial[fr->cpi_partial_count] = partial;
+      cpi->fr_partial_bits[fr->cpi_partial_count] = 1;
+      fr->cpi_partial_count++;
+    }
+      
+    fr->sb_partial_break=0;
+    cpi->fr_partial_bits[fr->cpi_partial_count] = 
+      SBRun( fr->sb_partial_count, cpi->fr_partial+fr->cpi_partial_count);
+    fr->cpi_partial_count++;
     
-    if(run_last == -1){
-      oggpackB_write( cpi->oggbuffer, sb->coded, 1);      
-      run_last = sb->coded;
+    if(fr->sb_partial_count >= 4129) fr->sb_partial_break = 1;
+    fr->sb_partial_count=1;
+  }
+  fr->sb_partial_last=partial;
+
+  /* fully coded/uncoded state */
+  if(!fr->sb_partial || !fr->sb_coded){
+    
+    if(fr->sb_full_last == -1){
+      cpi->fr_full[fr->cpi_full_count] = fr->sb_coded;
+      cpi->fr_full_bits[fr->cpi_full_count] = 1;
+      fr->cpi_full_count++;
+      fr->sb_full_last = fr->sb_coded;
     }
     
-    if(run_last == sb->coded && run_count < 4129){
-      run_count++;
+    if(fr->sb_full_last == fr->sb_coded && fr->sb_full_count < 4129){
+      fr->sb_full_count++;
     }else{
-      if(run_break)
-	oggpackB_write( cpi->oggbuffer, sb->coded, 1);
-      run_break=0;
-      FrArrayCodeSBRun( cpi, run_count );      
-      if(run_count >= 4129) run_break = 1;
-      run_count=1;
+      if(fr->sb_full_break){
+	cpi->fr_full[fr->cpi_full_count] = fr->sb_coded;
+	cpi->fr_full_bits[fr->cpi_full_count] = 1;
+	fr->cpi_full_count++;
+      }
+
+      fr->sb_full_break=0;
+      cpi->fr_full_bits[fr->cpi_full_count] = 
+	SBRun( fr->sb_full_count, cpi->fr_full+fr->cpi_full_count);
+      fr->cpi_full_count++;
+      if(fr->sb_full_count >= 4129) fr->sb_full_break = 1;
+      fr->sb_full_count=1;
     }
-    run_last=sb->coded;
+    fr->sb_full_last=fr->sb_coded;
+
   }
-  if(run_break)
-    oggpackB_write( cpi->oggbuffer, run_last, 1);
 
-  if(run_count)
-    FrArrayCodeSBRun(cpi, run_count);      
+  fr->b_pend=0;
+  fr->sb_partial=0;
+  fr->sb_coded=0;
+}
 
-  /* code the block flags */
-  run_last = -1;
-  run_count = 0;
-  for( SB = 0; SB < cpi->super_total; SB++ ) {
-    superblock_t *sb = &cpi->super[0][SB];
+static void fr_flush(CP_INSTANCE *cpi, fr_state_t *fr){
+  /* flush any pending partial run */
+  if(fr->sb_partial_break){
+    cpi->fr_partial[fr->cpi_partial_count] = fr->sb_partial_last;
+    cpi->fr_partial_bits[fr->cpi_partial_count] = 1;
+    fr->cpi_partial_count++;
+  }
+  if(fr->sb_partial_count){
+    cpi->fr_partial_bits[fr->cpi_partial_count] = 
+      SBRun( fr->sb_partial_count, cpi->fr_partial+fr->cpi_partial_count);
+    fr->cpi_partial_count++;
+  }
 
-    if(!sb->coded || !sb->partial) continue;
+  /* flush any pending full run */
+  if(fr->sb_full_break){
+    cpi->fr_full[fr->cpi_full_count] = fr->sb_full_last;
+    cpi->fr_full_bits[fr->cpi_full_count] = 1;
+    fr->cpi_full_count++;
+  }
+  if(fr->sb_full_count){
+    cpi->fr_full_bits[fr->cpi_full_count] = 
+      SBRun( fr->sb_full_count, cpi->fr_full+fr->cpi_full_count);
+    fr->cpi_full_count++;
+  }
 
-    for ( B=0; B<16; B++ ) {
-      int fi = sb->f[B];      
-      if(fi != invalid_fi){
-	if(run_last == -1){
-	  oggpackB_write( cpi->oggbuffer, cp[fi], 1);      
-	  run_last = cp[fi];
-	}
-	
-	if(run_last == cp[fi]){
-	  run_count++;
-	}else{
-	  FrArrayCodeBlockRun( cpi, run_count );
-	  run_count=1;
-	}
-	run_last=cp[fi];
-      }
-    }
+  /* flush any pending block run */
+  if(fr->b_count){
+    cpi->fr_block_bits[fr->cpi_block_count] = 
+      Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+    fr->cpi_block_count++;
   }
-  if(run_count)
-    FrArrayCodeBlockRun( cpi, run_count );
+}
 
+void fr_write(CP_INSTANCE *cpi, fr_state_t *fr){
+  int i;
+
+  fr_flush(cpi,fr);
+
+  for(i=0;i<fr->cpi_partial_count;i++)
+    oggpackB_write( cpi->oggbuffer, cpi->fr_partial[i], cpi->fr_partial_bits[i]);      
+  for(i=0;i<fr->cpi_full_count;i++)
+    oggpackB_write( cpi->oggbuffer, cpi->fr_full[i], cpi->fr_full_bits[i]);      
+  for(i=0;i<fr->cpi_block_count;i++)
+    oggpackB_write( cpi->oggbuffer, cpi->fr_block[i], cpi->fr_block_bits[i]);      
 }

Modified: branches/theora-thusnelda/lib/enc/frinit.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frinit.c	2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/frinit.c	2008-06-24 23:17:13 UTC (rev 15067)
@@ -46,6 +46,13 @@
 
   if(cpi->macro) _ogg_free(cpi->macro);
   if(cpi->super[0]) _ogg_free(cpi->super[0]);
+
+  if(cpi->fr_partial)_ogg_free(cpi->fr_partial);
+  if(cpi->fr_partial_bits)_ogg_free(cpi->fr_partial_bits);
+  if(cpi->fr_full)_ogg_free(cpi->fr_full);
+  if(cpi->fr_full_bits)_ogg_free(cpi->fr_full_bits);
+  if(cpi->fr_block)_ogg_free(cpi->fr_block);
+  if(cpi->fr_block_bits)_ogg_free(cpi->fr_block_bits);
 }
 
 /* A note to people reading and wondering why malloc returns aren't
@@ -128,6 +135,13 @@
   cpi->dct_token_storage = _ogg_malloc(cpi->frag_total*BLOCK_SIZE*sizeof(*cpi->dct_token_storage));
   cpi->dct_token_eb_storage = _ogg_malloc(cpi->frag_total*BLOCK_SIZE*sizeof(*cpi->dct_token_eb_storage));
 
+  cpi->fr_partial = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_partial));
+  cpi->fr_partial_bits = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_partial_bits));
+  cpi->fr_full = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_full));
+  cpi->fr_full_bits = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_full_bits));
+  cpi->fr_block = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->fr_block));
+  cpi->fr_block_bits = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->fr_block_bits));
+
 #ifdef COLLECT_METRICS
   cpi->frag_mbi = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->frag_mbi));
   for(i=0;i<8;i++)

Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c	2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/mode.c	2008-06-24 23:17:13 UTC (rev 15067)
@@ -509,26 +509,68 @@
   dsp_copy8x8 (cpi->dsp, cpi->lastrecon+bi, cpi->recon+bi, stride);
 }      
 
-static int TQB (CP_INSTANCE *cpi, int mode, int fi, mv_t mv, int plane, ogg_int16_t re_q[2][3][64], 
-		 long *rho_count, int keyframe, int *uncoded_ssd_acc, int *coded_ssd_acc, int *cost_acc){
+typedef struct{
+  int uncoded_ssd;
+  int uncoded_cost;
+  int ssd;
+  int cost;
+} rd_metric_t;
 
+typedef struct{
+  int plane;
+  int qi;
+  ogg_int16_t re_q[2][3][64];
+  ogg_int32_t *iq[2];
+  quant_tables *qq[2];
+  ogg_int32_t *mode_rate[2];
+  int xqp;
+  int yqp;
+  int ssdmul;
+} plane_state_t;
+
+static void ps_setup_frame(CP_INSTANCE *cpi, plane_state_t *ps){
+  int i,j,k;
   int qi = cpi->BaseQ; /* temporary */;
-  int inter = (mode != CODE_INTRA);
-  ogg_int32_t *iq = cpi->iquant_tables[inter][plane][qi];
+
+  ps->qi = qi;
+  for(i=0;i<2;i++)
+    for(j=0;j<3;j++)
+      for(k=0;k<64;k++)
+	ps->re_q[i][j][k]=cpi->quant_tables[i][j][k][qi];
+}
+
+static void ps_setup_plane(CP_INSTANCE *cpi, plane_state_t *ps, int plane){
+  ps->plane = plane;
+  ps->iq[0] = cpi->iquant_tables[0][plane][ps->qi];
+  ps->iq[1] = cpi->iquant_tables[1][plane][ps->qi];
+  ps->qq[0] = &(cpi->quant_tables[0][plane]);
+  ps->qq[1] = &(cpi->quant_tables[1][plane]);
+  ps->mode_rate[0] = mode_rate[ps->qi][plane][0];
+  ps->mode_rate[1] = mode_rate[ps->qi][plane][1];
+  ps->xqp = (plane && cpi->info.pixelformat != OC_PF_444);
+  ps->yqp = (plane && cpi->info.pixelformat == OC_PF_420);
+  ps->ssdmul = (ps->xqp+1)*(ps->yqp+1);
+}
+
+static int TQB (CP_INSTANCE *cpi, plane_state_t *ps, int mode, int fi, mv_t mv, 
+		int uncoded_overhead, int coded_overhead, rd_metric_t *mo, long *rho_count){
+  
+  int keyframe = (cpi->FrameType == KEY_FRAME);
+  int qi = ps->qi;
+  ogg_int32_t *iq = ps->iq[mode != CODE_INTRA];
   ogg_int16_t buffer[64];
   ogg_int16_t *data = cpi->frag_dct[fi].data;
   int bi = cpi->frag_buffer_index[fi];
-  int stride = cpi->stride[plane];
-  int xqp = (plane && cpi->info.pixelformat != OC_PF_444);
-  int yqp = (plane && cpi->info.pixelformat == OC_PF_420);
+  int stride = cpi->stride[ps->plane];
   unsigned char *frame_ptr = &cpi->frame[bi];
   unsigned char *lastrecon = ((mode == CODE_USING_GOLDEN || 
 			       mode == CODE_GOLDEN_MV) ? 
 			      cpi->golden : cpi->lastrecon)+bi;
   unsigned char *thisrecon = cpi->recon+bi;
   int nonzero=63;
-  ogg_int16_t *dequant = re_q[inter][plane];
+  ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
   int uncoded_ssd=0,coded_ssd=0,sad=0;
+  int lambda = cpi->skip_lambda;
 
   /* motion comp */
   switch(mode){
@@ -539,10 +581,10 @@
   case CODE_INTER_FOURMV:
     
     {    
-      int mx = mvmap[xqp][mv.x+31];
-      int my = mvmap[yqp][mv.y+31];
-      int mx2 = mvmap2[xqp][mv.x+31];
-      int my2 = mvmap2[yqp][mv.y+31];
+      int mx = mvmap[ps->xqp][mv.x+31];
+      int my = mvmap[ps->yqp][mv.y+31];
+      int mx2 = mvmap2[ps->xqp][mv.x+31];
+      int my2 = mvmap2[ps->yqp][mv.y+31];
       
       unsigned char *r1 = lastrecon + my * stride + mx;
       
@@ -585,7 +627,7 @@
     if(mode==CODE_INTRA){
       sad >>=6;
     }else{
-      if(plane)sad<<=2;
+      if(ps->plane)sad<<=2;
     }
   }
 
@@ -595,7 +637,7 @@
   /* collect rho metrics, quantize */
   {
     int i;
-    quant_tables *qq = &(cpi->quant_tables[inter][plane]);
+    quant_tables *qq = ps->qq[mode != CODE_INTRA];
     
     for(i=0;i<64;i++){
       int ii = dezigzag_index[i];
@@ -639,7 +681,7 @@
 
   if(!keyframe){
     int i;
-    int cost = BINMAP(mode_rate[qi][plane][mode==CODE_INTRA],sad);
+    int cost = BINMAP(mode_rate[qi][ps->plane][mode==CODE_INTRA],sad);
     if(cost<0)cost=0; /* some of the trained fits can return a negative cost for zero entropy */
 
     /* in retrospect, should we have skipped this block? */
@@ -651,20 +693,25 @@
     for(i=0;i<64;i++)
       coded_ssd += buffer[i]*buffer[i];
 
-    if(plane){
-      coded_ssd*=4;
-      uncoded_ssd*=4;
-    }
+    /* for undersampled planes */
+    coded_ssd*=ps->ssdmul;
+    uncoded_ssd*=ps->ssdmul;
     
-    if(uncoded_ssd <= coded_ssd+((cpi->skip_lambda*cost)>>(OC_BIT_SCALE))){ 
-      uncode_frag(cpi,fi,plane);
+    mo->uncoded_ssd+=uncoded_ssd;
+    mo->uncoded_cost+=(uncoded_overhead<<OC_BIT_SCALE);
+
+    if(uncoded_ssd+uncoded_overhead*lambda <= coded_ssd+coded_overhead*lambda+((cost*lambda)>>OC_BIT_SCALE)){ 
+      uncode_frag(cpi,fi,ps->plane);
+
+      mo->ssd+=uncoded_ssd;
+      mo->cost+=(uncoded_overhead<<OC_BIT_SCALE);
+
       return 0;
+    }else{
+
+      mo->ssd+=coded_ssd;
+      mo->cost+=cost+(coded_overhead<<OC_BIT_SCALE);
     }
-
-    *uncoded_ssd_acc+=uncoded_ssd;
-    *coded_ssd_acc+=coded_ssd;
-    *cost_acc+=cost;
-
   }
     
   return 1;
@@ -672,27 +719,18 @@
 
 static int macroblock_phase_Y[4][4] = {{0,1,3,2},{0,2,3,1},{0,2,3,1},{3,2,0,1}};
 
-static int TQMB_Y ( CP_INSTANCE *cpi, macroblock_t *mb, int mb_phase, int qi, 
-		    ogg_int16_t req[2][3][64], long *rc, int keyframe, int overhead){
+static int TQMB_Y ( CP_INSTANCE *cpi, macroblock_t *mb, int mb_phase, plane_state_t *ps, long *rc, 
+		    int mode_overhead, fr_state_t *fr){
   unsigned char *cp=cpi->frag_coded;
   int mode = mb->mode;
+  int coded = 0;
   int i;
-  int coded=0;
-  int coded_ssd=0;
-  int uncoded_ssd=0;
-  int coded_cost=0;
-  
-  int ysb_coded = 0;
-  int ysb_partial = 0;
+  fr_state_t fr_checkpoint;
 
-  superblock_t *ysb = &cpi->super[0][mb->ysb];
+  rd_metric_t mo;
+  memset(&mo,0,sizeof(mo));
 
-  /* It's exceptionally difficult in the current Theora coding
-     structure to take the global superblock coding runs into account
-     when computing relative bitcosts in block coding decisions, but
-     this is a relatively exceedingly minor cost consideration.  We do
-     account for the local coding costs of skip blocks within the
-     superblock, a more significant contribution to consider. */
+  memcpy(&fr_checkpoint,fr,sizeof(fr_checkpoint));
 
   for(i=0;i<4;i++){
     /* Blocks must be handled in Hilbert order which is defined by MB
@@ -700,28 +738,34 @@
        raster order just to make it more difficult. */
     int bi = macroblock_phase_Y[mb_phase][i];
     int fi = mb->Ryuv[0][bi];
-    if(TQB(cpi,mode,fi,mb->mv[bi],0,req,rc,keyframe, &uncoded_ssd, &coded_ssd, &coded_cost)){
-      ysb_coded=1;
+    if(TQB(cpi,ps,mode,fi,mb->mv[bi],0,0,&mo,rc)){
+      fr_codeblock(cpi,fr);
       coded++;
     }else{
-      ysb_partial=1;
+      fr_skipblock(cpi,fr);
       if(mode == CODE_INTER_FOURMV) 
 	mb->mv[bi]=(mv_t){0,0};
     }
   }
 
-  if(!keyframe){
+
+  if(cpi->FrameType != KEY_FRAME){
     if(coded){
       /* block by block, still coding the MB.  Now consider the
 	 macroblock coding cost as a whole (mode and MV) */ 
-      if(uncoded_ssd <= coded_ssd+((cpi->skip_lambda*(coded_cost+overhead))>>(OC_BIT_SCALE))){     
+      if(mo.uncoded_ssd+((cpi->skip_lambda*mo.uncoded_cost)>>OC_BIT_SCALE) <= 
+	 mo.ssd+((cpi->skip_lambda*(mo.cost+mode_overhead))>>(OC_BIT_SCALE))){     
 	/* taking macroblock overhead into account, it is not worth coding this MB */
+
+	memcpy(fr,&fr_checkpoint,sizeof(fr_checkpoint));
 	for(i=0;i<4;i++){
 	  int fi = mb->Ryuv[0][i];
+	  fr_skipblock(cpi,fr);
 	  if(cp[fi])
 	    uncode_frag(cpi,fi,0);
 	}
 	coded=0;
+
       }
     }
 
@@ -729,7 +773,6 @@
       mb->mode = CODE_INTER_NO_MV; /* No luma blocks coded, mode is forced */
       mb->coded = 0;
       mb->mv[0] = mb->mv[1] = mb->mv[2] = mb->mv[3] = (mv_t){0,0};
-      ysb->partial = 1;
       return 0; 
 
     }
@@ -756,132 +799,91 @@
     }
   }
   
-  
-  ysb->coded |= ysb_coded;
-  ysb->partial |= ysb_partial;
   return coded;  
 }
 
 static int macroblock_phase_422[16] = {0,0,2,2,0,2,2,0,0,2,2,0,2,2,0,0};
 static int macroblock_phase_444[16] = {0,1,3,2,0,2,3,1,0,2,3,1,3,2,0,1};
 
-static int TQSB_UV ( CP_INSTANCE *cpi, superblock_t *sb, int plane,
-		     int qi, ogg_int16_t req[2][3][64], long *rc, int keyframe){
+static int TQSB_UV ( CP_INSTANCE *cpi, superblock_t *sb, plane_state_t *ps, long *rc, fr_state_t *fr){
   int pf = cpi->info.pixelformat;
   int i;
-  int coded=0;
-  int coded_ssd=0;
-  int uncoded_ssd=0;
-  int coded_cost=0;
-
-  int sb_coded = 0;
-  int sb_partial = 0;
+  int coded = 0;
   unsigned char *cp=cpi->frag_coded;
+  rd_metric_t mo;
+  memset(&mo,0,sizeof(mo));
 
-  switch(pf){
-  case OC_PF_420:
-    /* sixteen blocks/macroblocks per chroma superblock */
-    for(i=0;i<16;i++){
-      int fi = sb->f[i];
-      if(cp[fi]){
-	macroblock_t *mb = &cpi->macro[sb->m[i]];
-	mv_t mv;
-	if(mb->mode == CODE_INTER_FOURMV){
+  for(i=0;i<16;i++){
+    int fi = sb->f[i];
+    int mb_phase;
+    if(cp[fi]){
+      macroblock_t *mb = &cpi->macro[sb->m[i]];
+      mv_t mv;
+      if(mb->mode == CODE_INTER_FOURMV){
+
+	switch(pf){
+	case OC_PF_420:
+	  /* sixteen blocks/macroblocks per chroma superblock */
 	  
 	  mv.x = mb->mv[0].x + mb->mv[1].x + mb->mv[2].x + mb->mv[3].x;
 	  mv.y = mb->mv[0].y + mb->mv[1].y + mb->mv[2].y + mb->mv[3].y;
 	  
 	  mv.x = ( mv.x >= 0 ? (mv.x + 2) / 4 : (mv.x - 2) / 4);
 	  mv.y = ( mv.y >= 0 ? (mv.y + 2) / 4 : (mv.y - 2) / 4);
-	}else{
-	  mv = mb->mv[0];
-	}
-	if(TQB(cpi,mb->mode,fi,mv,plane,req,rc,keyframe,&uncoded_ssd,&coded_ssd,&coded_cost)){
-	  sb_coded=1;
-	  coded++;
-	}else{
-	  sb_partial=1;
-	}
-      }
-    }
-    break;
-  case OC_PF_422:
-    /* sixteen blocks / eight macroblocks per chroma superblock */
-    for(i=0;i<16;i++){
-      int fi = sb->f[i];
-      if(cp[fi]){
-	macroblock_t *mb = &cpi->macro[sb->m[i]];
-	int mb_phase = macroblock_phase_422[i];
-	mv_t mv;
-	
-	if(mb->mode == CODE_INTER_FOURMV){
+	  break;
+
+	case OC_PF_422:
+	  /* sixteen blocks / eight macroblocks per chroma superblock */
+	  mb_phase = macroblock_phase_422[i];
 	  mv.x = mb->mv[mb_phase].x + mb->mv[mb_phase+1].x;
 	  mv.y = mb->mv[mb_phase].y + mb->mv[mb_phase+1].y;
 	  mv.x = ( mv.x >= 0 ? (mv.x + 1) / 2 : (mv.x - 1) / 2);
 	  mv.y = ( mv.y >= 0 ? (mv.y + 1) / 2 : (mv.y - 1) / 2);
-	}else{
-	  mv = mb->mv[0];
+	  break;
+	default: /*case OC_PF_444: */
+	  /* sixteen blocks / eight macroblocks per chroma superblock */
+	  mb_phase = macroblock_phase_444[i];
+	  mv = mb->mv[mb_phase];
+	  break;
 	}
+      }else
+	mv = mb->mv[0];
 	
-	if(TQB(cpi,mb->mode,fi,mv,plane,req,rc,keyframe,&uncoded_ssd,&coded_ssd,&coded_cost)){
-	  sb_coded=1;
-	  coded++;
-	}else{
-	  sb_partial=1;
-	}
+      if(TQB(cpi,ps,mb->mode,fi,mv,0,0,&mo,rc)){
+	fr_codeblock(cpi,fr);
+	coded++;
+      }else{
+	fr_skipblock(cpi,fr);
       }
     }
-    break;
-    
-  case OC_PF_444:
-    /* sixteen blocks / four macroblocks per chroma superblock */
-    for(i=0;i<16;i++){
-      int fi = sb->f[i];
-      if(cp[fi]){
-	macroblock_t *mb = &cpi->macro[sb->m[i]];
-	int mb_phase = macroblock_phase_444[i];
-	
-	if(TQB(cpi,mb->mode,fi,mb->mv[mb_phase],plane,req,rc,keyframe,&uncoded_ssd,&coded_ssd,&coded_cost)){
-	  sb_coded=1;
-	  coded++;
-	}else{
-	  sb_partial=1;
-	}
-      }
-    }
-    
-    break;
   }
 
-  sb->coded = sb_coded;
-  sb->partial = sb_partial;
-  return coded;
-  
+  return coded;  
 }
 
 int PickModes(CP_INSTANCE *cpi, int recode){
   unsigned char qi = cpi->BaseQ; // temporary
   superblock_t *sb = cpi->super[0];
   superblock_t *sb_end;
-  int i,j,k;
+  int i,j;
   ogg_uint32_t interbits = 0;
   ogg_uint32_t intrabits = 0;
   mc_state mcenc;
   mv_t last_mv = {0,0};
   mv_t prior_mv = {0,0};
-  ogg_int16_t req[2][3][64];
   long rho_count[65];
+  plane_state_t ps;
+  fr_state_t fr;
+
 #ifdef COLLECT_METRICS
   int sad[8][3][4];
 #endif
   oc_mode_scheme_chooser_init(cpi);
+  ps_setup_frame(cpi,&ps);
+  ps_setup_plane(cpi,&ps,0);
+  fr_clear(cpi,&fr);
+
   memset(rho_count,0,sizeof(rho_count));
-
-  for(i=0;i<2;i++)
-    for(j=0;j<3;j++)
-      for(k=0;k<64;k++)
-	req[i][j][k]=cpi->quant_tables[i][j][k][qi];
-
   cpi->MVBits_0 = 0;
   cpi->MVBits_1 = 0;
  
@@ -893,8 +895,6 @@
   sb = cpi->super[0];
   sb_end = sb + cpi->super_n[0];
   for(; sb<sb_end; sb++){
-    sb->coded=0;
-    sb->partial=0;
 
     for(j = 0; j<4; j++){ /* mode addressing is through Y plane, always 4 MB per SB */
       int mbi = sb->m[j];
@@ -931,7 +931,7 @@
       if(cpi->FrameType == KEY_FRAME){
 	mb->mode = CODE_INTRA;
 	/* Transform, quantize, collect rho metrics */
-	TQMB_Y(cpi, mb, j, qi, req, rho_count, 1, 0);
+	TQMB_Y(cpi, mb, j, &ps, rho_count, 0, &fr);
 	
       }else{
 
@@ -1022,7 +1022,7 @@
 	mb->mode = mode;
 	
 	/* Transform, quantize, collect rho metrics */
-	if(TQMB_Y(cpi, mb, j, qi, req, rho_count, 0, overhead[mode])){
+	if(TQMB_Y(cpi, mb, j, &ps, rho_count, overhead[mode], &fr)){
 
 	  switch(mb->mode){
 	  case CODE_INTER_PLUS_MV:
@@ -1047,6 +1047,8 @@
 	    else 
 	      last_mv = mb->mv[0];
 	    break;
+	  default:
+	    break;
 	  }
 	  
 	  oc_mode_set(cpi,mb,mb->mode);      
@@ -1055,19 +1057,26 @@
 	}
       }
     }
+    fr_finishsb(cpi,&fr);
   }
 
   /* code chroma U */
   sb = cpi->super[1];
   sb_end = sb + cpi->super_n[1];
-  for(; sb<sb_end; sb++)
-    TQSB_UV(cpi, sb, 1, qi, req, rho_count, cpi->FrameType == KEY_FRAME);
+  ps_setup_plane(cpi,&ps,1);
+  for(; sb<sb_end; sb++){
+    TQSB_UV(cpi, sb, &ps, rho_count, &fr);
+    fr_finishsb(cpi,&fr);
+  }
 
   /* code chroma V */
   sb = cpi->super[2];
   sb_end = sb + cpi->super_n[2];
-  for(; sb<sb_end; sb++)
-    TQSB_UV(cpi, sb, 2, qi, req, rho_count, cpi->FrameType == KEY_FRAME);
+  ps_setup_plane(cpi,&ps,2);
+  for(; sb<sb_end; sb++){
+    TQSB_UV(cpi, sb, &ps, rho_count, &fr);
+    fr_finishsb(cpi,&fr);
+  }
 
   for(i=1;i<65;i++)
     rho_count[i]+=rho_count[i-1];
@@ -1093,7 +1102,7 @@
        to code them. */
     {
       ogg_uint32_t bits = oggpackB_bits(cpi->oggbuffer);
-      PackAndWriteDFArray(cpi);
+      fr_write(cpi,&fr);
       interbits += ((oggpackB_bits(cpi->oggbuffer) - bits) << OC_BIT_SCALE);
     }
     



More information about the commits mailing list