[xiph-commits] r15067 - branches/theora-thusnelda/lib/enc
xiphmont at svn.xiph.org
xiphmont at svn.xiph.org
Tue Jun 24 16:17:14 PDT 2008
Author: xiphmont
Date: 2008-06-24 16:17:13 -0700 (Tue, 24 Jun 2008)
New Revision: 15067
Modified:
branches/theora-thusnelda/lib/enc/codec_internal.h
branches/theora-thusnelda/lib/enc/dsp.h
branches/theora-thusnelda/lib/enc/frarray.c
branches/theora-thusnelda/lib/enc/frinit.c
branches/theora-thusnelda/lib/enc/mode.c
Log:
Complete rewrite of the DFarray code; it's now tracked and coded as part of the main encode loop so that the bit usage information of the block runs is available to block coding decisions.
Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================
--- branches/theora-thusnelda/lib/enc/codec_internal.h 2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h 2008-06-24 23:17:13 UTC (rev 15067)
@@ -26,6 +26,7 @@
#include "theora/theora.h"
#include "encoder_huffman.h"
+typedef struct CP_INSTANCE CP_INSTANCE;
#include "dsp.h"
#define theora_read(x,y,z) ( oggpackB_read(x,y,z) )
@@ -154,9 +155,6 @@
typedef struct superblock {
int f[16]; // hilbert order
int m[16]; // hilbert order: only 4 for luma, but 16 for U/V (to match f) */
-
- int partial;
- int coded;
} superblock_t;
typedef ogg_int16_t quant_table[64];
@@ -187,7 +185,7 @@
} oc_mode_scheme_chooser;
/* Encoder (Compressor) instance -- installed in a theora_state */
-typedef struct CP_INSTANCE {
+struct CP_INSTANCE {
/*This structure must be first.
It contains entry points accessed by the decoder library's API wrapper, and
is the only assumption that library makes about our internal format.*/
@@ -244,6 +242,12 @@
/*********************************************************************/
/* Token Buffers */
+ int *fr_partial;
+ unsigned char *fr_partial_bits;
+ int *fr_full;
+ unsigned char *fr_full_bits;
+ ogg_int16_t *fr_block;
+ unsigned char *fr_block_bits;
unsigned char *dct_token_storage;
ogg_uint16_t *dct_token_eb_storage;
@@ -298,7 +302,7 @@
DspFunctions dsp; /* Selected functions for this platform */
-} CP_INSTANCE;
+};
#define clamp255(x) ((unsigned char)((((x)<0)-1) & ((x) | -((x)>255))))
@@ -325,8 +329,6 @@
extern void WriteHuffmanTrees(HUFF_ENTRY *HuffRoot[NUM_HUFF_TABLES],
oggpack_buffer *opb);
-extern void PackAndWriteDFArray( CP_INSTANCE *cpi );
-
extern void WriteFrameHeader( CP_INSTANCE *cpi) ;
extern void EncodeData(CP_INSTANCE *cpi);
@@ -357,6 +359,33 @@
extern void ClearFrameInfo (CP_INSTANCE *cpi);
+typedef struct {
+ int cpi_partial_count;
+ int cpi_full_count;
+ int cpi_block_count;
+
+ ogg_uint16_t sb_partial_count;
+ ogg_uint16_t sb_full_count;
+
+ signed char sb_partial_last;
+ signed char sb_full_last;
+ signed char b_last;
+ signed char b_count;
+ signed char b_pend;
+
+ char sb_partial_break;
+ char sb_full_break;
+ char sb_partial;
+ char sb_coded;
+
+} fr_state_t;
+
+void fr_clear(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_skipblock(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_codeblock(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_finishsb(CP_INSTANCE *cpi, fr_state_t *fr);
+void fr_write(CP_INSTANCE *cpi, fr_state_t *fr);
+
#ifdef COLLECT_METRICS
extern void ModeMetrics(CP_INSTANCE *cpi, int huff[4]);
extern void DumpMetrics(CP_INSTANCE *cpi);
Modified: branches/theora-thusnelda/lib/enc/dsp.h
===================================================================
--- branches/theora-thusnelda/lib/enc/dsp.h 2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/dsp.h 2008-06-24 23:17:13 UTC (rev 15067)
@@ -73,7 +73,7 @@
ogg_uint32_t (*inter8x8_err_xy2)(unsigned char *SrcData, unsigned char *RefDataPtr1,
unsigned char *RefDataPtr2, ogg_uint32_t stride);
- void (*LoopFilter) (void *cpi, int FLimit);
+ void (*LoopFilter) (CP_INSTANCE *cpi, int FLimit);
void (*FilterVert) (unsigned char * PixelPtr,
ogg_int32_t LineLength, ogg_int16_t *BoundingValuePtr);
Modified: branches/theora-thusnelda/lib/enc/frarray.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frarray.c 2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/frarray.c 2008-06-24 23:17:13 UTC (rev 15067)
@@ -19,189 +19,287 @@
#include "codec_internal.h"
#include <stdio.h>
-/* Long run bit string coding */
-static ogg_uint32_t FrArrayCodeSBRun( CP_INSTANCE *cpi, ogg_uint32_t value){
- ogg_uint32_t CodedVal = 0;
- ogg_uint32_t CodedBits = 0;
+void fr_clear(CP_INSTANCE *cpi, fr_state_t *fr){
+ fr->sb_partial_last = -1;
+ fr->sb_partial_count = 0;
+ fr->sb_partial_break = 0;
- /* Coding scheme:
- Codeword RunLength
- 0 1
- 10x 2-3
- 110x 4-5
- 1110xx 6-9
- 11110xxx 10-17
- 111110xxxx 18-33
- 111111xxxxxxxxxxxx 34-4129 */
+ fr->sb_full_last = -1;
+ fr->sb_full_count = 0;
+ fr->sb_full_break = 0;
- if ( value == 1 ){
- CodedVal = 0;
- CodedBits = 1;
- } else if ( value <= 3 ) {
- CodedVal = 0x0004 + (value - 2);
- CodedBits = 3;
- } else if ( value <= 5 ) {
- CodedVal = 0x000C + (value - 4);
- CodedBits = 4;
- } else if ( value <= 9 ) {
- CodedVal = 0x0038 + (value - 6);
- CodedBits = 6;
- } else if ( value <= 17 ) {
- CodedVal = 0x00F0 + (value - 10);
- CodedBits = 8;
- } else if ( value <= 33 ) {
- CodedVal = 0x03E0 + (value - 18);
- CodedBits = 10;
- } else {
- CodedVal = 0x3F000 + (value - 34);
- CodedBits = 18;
- }
+ fr->b_last = -1;
+ fr->b_count = 0;
+ fr->b_pend = 0;
- /* Add the bits to the encode holding buffer. */
- oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
+ fr->sb_partial=0;
+ fr->sb_coded=0;
- return CodedBits;
+ fr->cpi_partial_count=0;
+ fr->cpi_full_count=0;
+ fr->cpi_block_count=0;
}
-/* Short run bit string coding */
-static ogg_uint32_t FrArrayCodeBlockRun( CP_INSTANCE *cpi,
- ogg_uint32_t value ) {
- ogg_uint32_t CodedVal = 0;
- ogg_uint32_t CodedBits = 0;
-
+static int Brun( ogg_uint32_t value, ogg_int16_t *token) {
+
/* Coding scheme:
- Codeword RunLength
- 0x 1-2
- 10x 3-4
- 110x 5-6
- 1110xx 7-10
- 11110xx 11-14
- 11111xxxx 15-30 */
+ Codeword RunLength
+ 0x 1-2
+ 10x 3-4
+ 110x 5-6
+ 1110xx 7-10
+ 11110xx 11-14
+ 11111xxxx 15-30 */
if ( value <= 2 ) {
- CodedVal = value - 1;
- CodedBits = 2;
+ *token = value - 1;
+ return 2;
} else if ( value <= 4 ) {
- CodedVal = 0x0004 + (value - 3);
- CodedBits = 3;
-
+ *token = 0x0004 + (value - 3);
+ return 3;
} else if ( value <= 6 ) {
- CodedVal = 0x000C + (value - 5);
- CodedBits = 4;
-
+ *token = 0x000C + (value - 5);
+ return 4;
} else if ( value <= 10 ) {
- CodedVal = 0x0038 + (value - 7);
- CodedBits = 6;
-
+ *token = 0x0038 + (value - 7);
+ return 6;
} else if ( value <= 14 ) {
- CodedVal = 0x0078 + (value - 11);
- CodedBits = 7;
+ *token = 0x0078 + (value - 11);
+ return 7;
} else {
- CodedVal = 0x01F0 + (value - 15);
- CodedBits = 9;
+ *token = 0x01F0 + (value - 15);
+ return 9;
}
+}
- /* Add the bits to the encode holding buffer. */
- oggpackB_write( cpi->oggbuffer, CodedVal, CodedBits );
+void fr_skipblock(CP_INSTANCE *cpi, fr_state_t *fr){
+ if(fr->sb_coded){
+ if(!fr->sb_partial){
- return CodedBits;
+ /* superblock was previously fully coded */
+
+ if(fr->b_last==-1){
+ /* first run of the frame */
+ cpi->fr_block[fr->cpi_block_count]=1;
+ cpi->fr_block_bits[fr->cpi_block_count]=1;
+ fr->cpi_block_count++;
+ fr->b_last = 1;
+ }
+
+ if(fr->b_last==1){
+ /* in-progress run also a coded run */
+ fr->b_count += fr->b_pend;
+ }else{
+ /* in-progress run an uncoded run; flush */
+ cpi->fr_block_bits[fr->cpi_block_count] =
+ Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+ fr->cpi_block_count++;
+ fr->b_count=fr->b_pend;
+ fr->b_last = 1;
+ }
+ }
+
+ /* add a skip block */
+ if(fr->b_last == 0){
+ fr->b_count++;
+ }else{
+ cpi->fr_block_bits[fr->cpi_block_count] =
+ Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+ fr->cpi_block_count++;
+ fr->b_count = 1;
+ fr->b_last = 0;
+ }
+ }
+
+ fr->b_pend++;
+ fr->sb_partial=1;
}
-void PackAndWriteDFArray( CP_INSTANCE *cpi ){
- ogg_uint32_t SB, B;
- int run_last = -1;
- int run_count = 0;
- int run_break = 0;
- int invalid_fi = cpi->frag_total;
- unsigned char *cp = cpi->frag_coded;
+void fr_codeblock(CP_INSTANCE *cpi, fr_state_t *fr){
+ if(fr->sb_partial){
+ if(!fr->sb_coded){
- /* code the partially coded SB flags */
- for( SB = 0; SB < cpi->super_total; SB++ ) {
- superblock_t *sb = &cpi->super[0][SB];
- int partial = (sb->partial & sb->coded);
+ /* superblock was previously completely uncoded */
- if(run_last == -1){
- oggpackB_write( cpi->oggbuffer, partial, 1);
- run_last = partial;
+ if(fr->b_last==-1){
+ /* first run of the frame */
+ cpi->fr_block[fr->cpi_block_count]=0;
+ cpi->fr_block_bits[fr->cpi_block_count]=1;
+ fr->cpi_block_count++;
+ fr->b_last = 0;
+ }
+
+ if(fr->b_last==0){
+ /* in-progress run also an uncoded run */
+ fr->b_count += fr->b_pend;
+ }else{
+ /* in-progress run a coded run; flush */
+ cpi->fr_block_bits[fr->cpi_block_count] =
+ Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+ fr->cpi_block_count++;
+ fr->b_count=fr->b_pend;
+ fr->b_last = 0;
+ }
}
-
- if(run_last == partial && run_count < 4129){
- run_count++;
+
+ /* add a coded block */
+ if(fr->b_last == 1){
+ fr->b_count++;
}else{
- if(run_break)
- oggpackB_write( cpi->oggbuffer, partial, 1);
-
- run_break=0;
- FrArrayCodeSBRun( cpi, run_count );
- if(run_count >= 4129) run_break = 1;
- run_count=1;
+ cpi->fr_block_bits[fr->cpi_block_count] =
+ Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+ fr->cpi_block_count++;
+ fr->b_count = 1;
+ fr->b_last = 1;
}
- run_last=partial;
}
- if(run_break)
- oggpackB_write( cpi->oggbuffer, run_last, 1);
- if(run_count)
- FrArrayCodeSBRun(cpi, run_count);
+
+ fr->b_pend++;
+ fr->sb_coded=1;
+}
- /* code the fully coded/uncoded SB flags */
- run_last = -1;
- run_count = 0;
- run_break = 0;
- for( SB = 0; SB < cpi->super_total; SB++ ) {
- superblock_t *sb = &cpi->super[0][SB];
+static int SBRun(ogg_uint32_t value, int *token){
+
+ /* Coding scheme:
+ Codeword RunLength
+ 0 1
+ 10x 2-3
+ 110x 4-5
+ 1110xx 6-9
+ 11110xxx 10-17
+ 111110xxxx 18-33
+ 111111xxxxxxxxxxxx 34-4129 */
+
+ if ( value == 1 ){
+ *token = 0;
+ return 1;
+ } else if ( value <= 3 ) {
+ *token = 0x0004 + (value - 2);
+ return 3;
+ } else if ( value <= 5 ) {
+ *token = 0x000C + (value - 4);
+ return 4;
+ } else if ( value <= 9 ) {
+ *token = 0x0038 + (value - 6);
+ return 6;
+ } else if ( value <= 17 ) {
+ *token = 0x00F0 + (value - 10);
+ return 8;
+ } else if ( value <= 33 ) {
+ *token = 0x03E0 + (value - 18);
+ return 10;
+ } else {
+ *token = 0x3F000 + (value - 34);
+ return 18;
+ }
+}
+
+void fr_finishsb(CP_INSTANCE *cpi, fr_state_t *fr){
+ /* update partial state */
+ int partial = (fr->sb_partial & fr->sb_coded);
+ if(fr->sb_partial_last == -1){
+ cpi->fr_partial[fr->cpi_partial_count] = partial;
+ cpi->fr_partial_bits[fr->cpi_partial_count] = 1;
+ fr->cpi_partial_count++;
+ fr->sb_partial_last = partial;
+ }
- if(sb->partial && sb->coded) continue;
+ if(fr->sb_partial_last == partial && fr->sb_partial_count < 4129){
+ fr->sb_partial_count++;
+ }else{
+ if(fr->sb_partial_break){
+ cpi->fr_partial[fr->cpi_partial_count] = partial;
+ cpi->fr_partial_bits[fr->cpi_partial_count] = 1;
+ fr->cpi_partial_count++;
+ }
+
+ fr->sb_partial_break=0;
+ cpi->fr_partial_bits[fr->cpi_partial_count] =
+ SBRun( fr->sb_partial_count, cpi->fr_partial+fr->cpi_partial_count);
+ fr->cpi_partial_count++;
- if(run_last == -1){
- oggpackB_write( cpi->oggbuffer, sb->coded, 1);
- run_last = sb->coded;
+ if(fr->sb_partial_count >= 4129) fr->sb_partial_break = 1;
+ fr->sb_partial_count=1;
+ }
+ fr->sb_partial_last=partial;
+
+ /* fully coded/uncoded state */
+ if(!fr->sb_partial || !fr->sb_coded){
+
+ if(fr->sb_full_last == -1){
+ cpi->fr_full[fr->cpi_full_count] = fr->sb_coded;
+ cpi->fr_full_bits[fr->cpi_full_count] = 1;
+ fr->cpi_full_count++;
+ fr->sb_full_last = fr->sb_coded;
}
- if(run_last == sb->coded && run_count < 4129){
- run_count++;
+ if(fr->sb_full_last == fr->sb_coded && fr->sb_full_count < 4129){
+ fr->sb_full_count++;
}else{
- if(run_break)
- oggpackB_write( cpi->oggbuffer, sb->coded, 1);
- run_break=0;
- FrArrayCodeSBRun( cpi, run_count );
- if(run_count >= 4129) run_break = 1;
- run_count=1;
+ if(fr->sb_full_break){
+ cpi->fr_full[fr->cpi_full_count] = fr->sb_coded;
+ cpi->fr_full_bits[fr->cpi_full_count] = 1;
+ fr->cpi_full_count++;
+ }
+
+ fr->sb_full_break=0;
+ cpi->fr_full_bits[fr->cpi_full_count] =
+ SBRun( fr->sb_full_count, cpi->fr_full+fr->cpi_full_count);
+ fr->cpi_full_count++;
+ if(fr->sb_full_count >= 4129) fr->sb_full_break = 1;
+ fr->sb_full_count=1;
}
- run_last=sb->coded;
+ fr->sb_full_last=fr->sb_coded;
+
}
- if(run_break)
- oggpackB_write( cpi->oggbuffer, run_last, 1);
- if(run_count)
- FrArrayCodeSBRun(cpi, run_count);
+ fr->b_pend=0;
+ fr->sb_partial=0;
+ fr->sb_coded=0;
+}
- /* code the block flags */
- run_last = -1;
- run_count = 0;
- for( SB = 0; SB < cpi->super_total; SB++ ) {
- superblock_t *sb = &cpi->super[0][SB];
+static void fr_flush(CP_INSTANCE *cpi, fr_state_t *fr){
+ /* flush any pending partial run */
+ if(fr->sb_partial_break){
+ cpi->fr_partial[fr->cpi_partial_count] = fr->sb_partial_last;
+ cpi->fr_partial_bits[fr->cpi_partial_count] = 1;
+ fr->cpi_partial_count++;
+ }
+ if(fr->sb_partial_count){
+ cpi->fr_partial_bits[fr->cpi_partial_count] =
+ SBRun( fr->sb_partial_count, cpi->fr_partial+fr->cpi_partial_count);
+ fr->cpi_partial_count++;
+ }
- if(!sb->coded || !sb->partial) continue;
+ /* flush any pending full run */
+ if(fr->sb_full_break){
+ cpi->fr_full[fr->cpi_full_count] = fr->sb_full_last;
+ cpi->fr_full_bits[fr->cpi_full_count] = 1;
+ fr->cpi_full_count++;
+ }
+ if(fr->sb_full_count){
+ cpi->fr_full_bits[fr->cpi_full_count] =
+ SBRun( fr->sb_full_count, cpi->fr_full+fr->cpi_full_count);
+ fr->cpi_full_count++;
+ }
- for ( B=0; B<16; B++ ) {
- int fi = sb->f[B];
- if(fi != invalid_fi){
- if(run_last == -1){
- oggpackB_write( cpi->oggbuffer, cp[fi], 1);
- run_last = cp[fi];
- }
-
- if(run_last == cp[fi]){
- run_count++;
- }else{
- FrArrayCodeBlockRun( cpi, run_count );
- run_count=1;
- }
- run_last=cp[fi];
- }
- }
+ /* flush any pending block run */
+ if(fr->b_count){
+ cpi->fr_block_bits[fr->cpi_block_count] =
+ Brun(fr->b_count, cpi->fr_block+fr->cpi_block_count);
+ fr->cpi_block_count++;
}
- if(run_count)
- FrArrayCodeBlockRun( cpi, run_count );
+}
+void fr_write(CP_INSTANCE *cpi, fr_state_t *fr){
+ int i;
+
+ fr_flush(cpi,fr);
+
+ for(i=0;i<fr->cpi_partial_count;i++)
+ oggpackB_write( cpi->oggbuffer, cpi->fr_partial[i], cpi->fr_partial_bits[i]);
+ for(i=0;i<fr->cpi_full_count;i++)
+ oggpackB_write( cpi->oggbuffer, cpi->fr_full[i], cpi->fr_full_bits[i]);
+ for(i=0;i<fr->cpi_block_count;i++)
+ oggpackB_write( cpi->oggbuffer, cpi->fr_block[i], cpi->fr_block_bits[i]);
}
Modified: branches/theora-thusnelda/lib/enc/frinit.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frinit.c 2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/frinit.c 2008-06-24 23:17:13 UTC (rev 15067)
@@ -46,6 +46,13 @@
if(cpi->macro) _ogg_free(cpi->macro);
if(cpi->super[0]) _ogg_free(cpi->super[0]);
+
+ if(cpi->fr_partial)_ogg_free(cpi->fr_partial);
+ if(cpi->fr_partial_bits)_ogg_free(cpi->fr_partial_bits);
+ if(cpi->fr_full)_ogg_free(cpi->fr_full);
+ if(cpi->fr_full_bits)_ogg_free(cpi->fr_full_bits);
+ if(cpi->fr_block)_ogg_free(cpi->fr_block);
+ if(cpi->fr_block_bits)_ogg_free(cpi->fr_block_bits);
}
/* A note to people reading and wondering why malloc returns aren't
@@ -128,6 +135,13 @@
cpi->dct_token_storage = _ogg_malloc(cpi->frag_total*BLOCK_SIZE*sizeof(*cpi->dct_token_storage));
cpi->dct_token_eb_storage = _ogg_malloc(cpi->frag_total*BLOCK_SIZE*sizeof(*cpi->dct_token_eb_storage));
+ cpi->fr_partial = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_partial));
+ cpi->fr_partial_bits = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_partial_bits));
+ cpi->fr_full = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_full));
+ cpi->fr_full_bits = _ogg_calloc(cpi->super_total+1, sizeof(*cpi->fr_full_bits));
+ cpi->fr_block = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->fr_block));
+ cpi->fr_block_bits = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->fr_block_bits));
+
#ifdef COLLECT_METRICS
cpi->frag_mbi = _ogg_calloc(cpi->frag_total+1, sizeof(*cpi->frag_mbi));
for(i=0;i<8;i++)
Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c 2008-06-24 20:49:51 UTC (rev 15066)
+++ branches/theora-thusnelda/lib/enc/mode.c 2008-06-24 23:17:13 UTC (rev 15067)
@@ -509,26 +509,68 @@
dsp_copy8x8 (cpi->dsp, cpi->lastrecon+bi, cpi->recon+bi, stride);
}
-static int TQB (CP_INSTANCE *cpi, int mode, int fi, mv_t mv, int plane, ogg_int16_t re_q[2][3][64],
- long *rho_count, int keyframe, int *uncoded_ssd_acc, int *coded_ssd_acc, int *cost_acc){
+typedef struct{
+ int uncoded_ssd;
+ int uncoded_cost;
+ int ssd;
+ int cost;
+} rd_metric_t;
+typedef struct{
+ int plane;
+ int qi;
+ ogg_int16_t re_q[2][3][64];
+ ogg_int32_t *iq[2];
+ quant_tables *qq[2];
+ ogg_int32_t *mode_rate[2];
+ int xqp;
+ int yqp;
+ int ssdmul;
+} plane_state_t;
+
+static void ps_setup_frame(CP_INSTANCE *cpi, plane_state_t *ps){
+ int i,j,k;
int qi = cpi->BaseQ; /* temporary */;
- int inter = (mode != CODE_INTRA);
- ogg_int32_t *iq = cpi->iquant_tables[inter][plane][qi];
+
+ ps->qi = qi;
+ for(i=0;i<2;i++)
+ for(j=0;j<3;j++)
+ for(k=0;k<64;k++)
+ ps->re_q[i][j][k]=cpi->quant_tables[i][j][k][qi];
+}
+
+static void ps_setup_plane(CP_INSTANCE *cpi, plane_state_t *ps, int plane){
+ ps->plane = plane;
+ ps->iq[0] = cpi->iquant_tables[0][plane][ps->qi];
+ ps->iq[1] = cpi->iquant_tables[1][plane][ps->qi];
+ ps->qq[0] = &(cpi->quant_tables[0][plane]);
+ ps->qq[1] = &(cpi->quant_tables[1][plane]);
+ ps->mode_rate[0] = mode_rate[ps->qi][plane][0];
+ ps->mode_rate[1] = mode_rate[ps->qi][plane][1];
+ ps->xqp = (plane && cpi->info.pixelformat != OC_PF_444);
+ ps->yqp = (plane && cpi->info.pixelformat == OC_PF_420);
+ ps->ssdmul = (ps->xqp+1)*(ps->yqp+1);
+}
+
+static int TQB (CP_INSTANCE *cpi, plane_state_t *ps, int mode, int fi, mv_t mv,
+ int uncoded_overhead, int coded_overhead, rd_metric_t *mo, long *rho_count){
+
+ int keyframe = (cpi->FrameType == KEY_FRAME);
+ int qi = ps->qi;
+ ogg_int32_t *iq = ps->iq[mode != CODE_INTRA];
ogg_int16_t buffer[64];
ogg_int16_t *data = cpi->frag_dct[fi].data;
int bi = cpi->frag_buffer_index[fi];
- int stride = cpi->stride[plane];
- int xqp = (plane && cpi->info.pixelformat != OC_PF_444);
- int yqp = (plane && cpi->info.pixelformat == OC_PF_420);
+ int stride = cpi->stride[ps->plane];
unsigned char *frame_ptr = &cpi->frame[bi];
unsigned char *lastrecon = ((mode == CODE_USING_GOLDEN ||
mode == CODE_GOLDEN_MV) ?
cpi->golden : cpi->lastrecon)+bi;
unsigned char *thisrecon = cpi->recon+bi;
int nonzero=63;
- ogg_int16_t *dequant = re_q[inter][plane];
+ ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
int uncoded_ssd=0,coded_ssd=0,sad=0;
+ int lambda = cpi->skip_lambda;
/* motion comp */
switch(mode){
@@ -539,10 +581,10 @@
case CODE_INTER_FOURMV:
{
- int mx = mvmap[xqp][mv.x+31];
- int my = mvmap[yqp][mv.y+31];
- int mx2 = mvmap2[xqp][mv.x+31];
- int my2 = mvmap2[yqp][mv.y+31];
+ int mx = mvmap[ps->xqp][mv.x+31];
+ int my = mvmap[ps->yqp][mv.y+31];
+ int mx2 = mvmap2[ps->xqp][mv.x+31];
+ int my2 = mvmap2[ps->yqp][mv.y+31];
unsigned char *r1 = lastrecon + my * stride + mx;
@@ -585,7 +627,7 @@
if(mode==CODE_INTRA){
sad >>=6;
}else{
- if(plane)sad<<=2;
+ if(ps->plane)sad<<=2;
}
}
@@ -595,7 +637,7 @@
/* collect rho metrics, quantize */
{
int i;
- quant_tables *qq = &(cpi->quant_tables[inter][plane]);
+ quant_tables *qq = ps->qq[mode != CODE_INTRA];
for(i=0;i<64;i++){
int ii = dezigzag_index[i];
@@ -639,7 +681,7 @@
if(!keyframe){
int i;
- int cost = BINMAP(mode_rate[qi][plane][mode==CODE_INTRA],sad);
+ int cost = BINMAP(mode_rate[qi][ps->plane][mode==CODE_INTRA],sad);
if(cost<0)cost=0; /* some of the trained fits can return a negative cost for zero entropy */
/* in retrospect, should we have skipped this block? */
@@ -651,20 +693,25 @@
for(i=0;i<64;i++)
coded_ssd += buffer[i]*buffer[i];
- if(plane){
- coded_ssd*=4;
- uncoded_ssd*=4;
- }
+ /* for undersampled planes */
+ coded_ssd*=ps->ssdmul;
+ uncoded_ssd*=ps->ssdmul;
- if(uncoded_ssd <= coded_ssd+((cpi->skip_lambda*cost)>>(OC_BIT_SCALE))){
- uncode_frag(cpi,fi,plane);
+ mo->uncoded_ssd+=uncoded_ssd;
+ mo->uncoded_cost+=(uncoded_overhead<<OC_BIT_SCALE);
+
+ if(uncoded_ssd+uncoded_overhead*lambda <= coded_ssd+coded_overhead*lambda+((cost*lambda)>>OC_BIT_SCALE)){
+ uncode_frag(cpi,fi,ps->plane);
+
+ mo->ssd+=uncoded_ssd;
+ mo->cost+=(uncoded_overhead<<OC_BIT_SCALE);
+
return 0;
+ }else{
+
+ mo->ssd+=coded_ssd;
+ mo->cost+=cost+(coded_overhead<<OC_BIT_SCALE);
}
-
- *uncoded_ssd_acc+=uncoded_ssd;
- *coded_ssd_acc+=coded_ssd;
- *cost_acc+=cost;
-
}
return 1;
@@ -672,27 +719,18 @@
static int macroblock_phase_Y[4][4] = {{0,1,3,2},{0,2,3,1},{0,2,3,1},{3,2,0,1}};
-static int TQMB_Y ( CP_INSTANCE *cpi, macroblock_t *mb, int mb_phase, int qi,
- ogg_int16_t req[2][3][64], long *rc, int keyframe, int overhead){
+static int TQMB_Y ( CP_INSTANCE *cpi, macroblock_t *mb, int mb_phase, plane_state_t *ps, long *rc,
+ int mode_overhead, fr_state_t *fr){
unsigned char *cp=cpi->frag_coded;
int mode = mb->mode;
+ int coded = 0;
int i;
- int coded=0;
- int coded_ssd=0;
- int uncoded_ssd=0;
- int coded_cost=0;
-
- int ysb_coded = 0;
- int ysb_partial = 0;
+ fr_state_t fr_checkpoint;
- superblock_t *ysb = &cpi->super[0][mb->ysb];
+ rd_metric_t mo;
+ memset(&mo,0,sizeof(mo));
- /* It's exceptionally difficult in the current Theora coding
- structure to take the global superblock coding runs into account
- when computing relative bitcosts in block coding decisions, but
- this is a relatively exceedingly minor cost consideration. We do
- account for the local coding costs of skip blocks within the
- superblock, a more significant contribution to consider. */
+ memcpy(&fr_checkpoint,fr,sizeof(fr_checkpoint));
for(i=0;i<4;i++){
/* Blocks must be handled in Hilbert order which is defined by MB
@@ -700,28 +738,34 @@
raster order just to make it more difficult. */
int bi = macroblock_phase_Y[mb_phase][i];
int fi = mb->Ryuv[0][bi];
- if(TQB(cpi,mode,fi,mb->mv[bi],0,req,rc,keyframe, &uncoded_ssd, &coded_ssd, &coded_cost)){
- ysb_coded=1;
+ if(TQB(cpi,ps,mode,fi,mb->mv[bi],0,0,&mo,rc)){
+ fr_codeblock(cpi,fr);
coded++;
}else{
- ysb_partial=1;
+ fr_skipblock(cpi,fr);
if(mode == CODE_INTER_FOURMV)
mb->mv[bi]=(mv_t){0,0};
}
}
- if(!keyframe){
+
+ if(cpi->FrameType != KEY_FRAME){
if(coded){
/* block by block, still coding the MB. Now consider the
macroblock coding cost as a whole (mode and MV) */
- if(uncoded_ssd <= coded_ssd+((cpi->skip_lambda*(coded_cost+overhead))>>(OC_BIT_SCALE))){
+ if(mo.uncoded_ssd+((cpi->skip_lambda*mo.uncoded_cost)>>OC_BIT_SCALE) <=
+ mo.ssd+((cpi->skip_lambda*(mo.cost+mode_overhead))>>(OC_BIT_SCALE))){
/* taking macroblock overhead into account, it is not worth coding this MB */
+
+ memcpy(fr,&fr_checkpoint,sizeof(fr_checkpoint));
for(i=0;i<4;i++){
int fi = mb->Ryuv[0][i];
+ fr_skipblock(cpi,fr);
if(cp[fi])
uncode_frag(cpi,fi,0);
}
coded=0;
+
}
}
@@ -729,7 +773,6 @@
mb->mode = CODE_INTER_NO_MV; /* No luma blocks coded, mode is forced */
mb->coded = 0;
mb->mv[0] = mb->mv[1] = mb->mv[2] = mb->mv[3] = (mv_t){0,0};
- ysb->partial = 1;
return 0;
}
@@ -756,132 +799,91 @@
}
}
-
- ysb->coded |= ysb_coded;
- ysb->partial |= ysb_partial;
return coded;
}
static int macroblock_phase_422[16] = {0,0,2,2,0,2,2,0,0,2,2,0,2,2,0,0};
static int macroblock_phase_444[16] = {0,1,3,2,0,2,3,1,0,2,3,1,3,2,0,1};
-static int TQSB_UV ( CP_INSTANCE *cpi, superblock_t *sb, int plane,
- int qi, ogg_int16_t req[2][3][64], long *rc, int keyframe){
+static int TQSB_UV ( CP_INSTANCE *cpi, superblock_t *sb, plane_state_t *ps, long *rc, fr_state_t *fr){
int pf = cpi->info.pixelformat;
int i;
- int coded=0;
- int coded_ssd=0;
- int uncoded_ssd=0;
- int coded_cost=0;
-
- int sb_coded = 0;
- int sb_partial = 0;
+ int coded = 0;
unsigned char *cp=cpi->frag_coded;
+ rd_metric_t mo;
+ memset(&mo,0,sizeof(mo));
- switch(pf){
- case OC_PF_420:
- /* sixteen blocks/macroblocks per chroma superblock */
- for(i=0;i<16;i++){
- int fi = sb->f[i];
- if(cp[fi]){
- macroblock_t *mb = &cpi->macro[sb->m[i]];
- mv_t mv;
- if(mb->mode == CODE_INTER_FOURMV){
+ for(i=0;i<16;i++){
+ int fi = sb->f[i];
+ int mb_phase;
+ if(cp[fi]){
+ macroblock_t *mb = &cpi->macro[sb->m[i]];
+ mv_t mv;
+ if(mb->mode == CODE_INTER_FOURMV){
+
+ switch(pf){
+ case OC_PF_420:
+ /* sixteen blocks/macroblocks per chroma superblock */
mv.x = mb->mv[0].x + mb->mv[1].x + mb->mv[2].x + mb->mv[3].x;
mv.y = mb->mv[0].y + mb->mv[1].y + mb->mv[2].y + mb->mv[3].y;
mv.x = ( mv.x >= 0 ? (mv.x + 2) / 4 : (mv.x - 2) / 4);
mv.y = ( mv.y >= 0 ? (mv.y + 2) / 4 : (mv.y - 2) / 4);
- }else{
- mv = mb->mv[0];
- }
- if(TQB(cpi,mb->mode,fi,mv,plane,req,rc,keyframe,&uncoded_ssd,&coded_ssd,&coded_cost)){
- sb_coded=1;
- coded++;
- }else{
- sb_partial=1;
- }
- }
- }
- break;
- case OC_PF_422:
- /* sixteen blocks / eight macroblocks per chroma superblock */
- for(i=0;i<16;i++){
- int fi = sb->f[i];
- if(cp[fi]){
- macroblock_t *mb = &cpi->macro[sb->m[i]];
- int mb_phase = macroblock_phase_422[i];
- mv_t mv;
-
- if(mb->mode == CODE_INTER_FOURMV){
+ break;
+
+ case OC_PF_422:
+ /* sixteen blocks / eight macroblocks per chroma superblock */
+ mb_phase = macroblock_phase_422[i];
mv.x = mb->mv[mb_phase].x + mb->mv[mb_phase+1].x;
mv.y = mb->mv[mb_phase].y + mb->mv[mb_phase+1].y;
mv.x = ( mv.x >= 0 ? (mv.x + 1) / 2 : (mv.x - 1) / 2);
mv.y = ( mv.y >= 0 ? (mv.y + 1) / 2 : (mv.y - 1) / 2);
- }else{
- mv = mb->mv[0];
+ break;
+ default: /*case OC_PF_444: */
+ /* sixteen blocks / eight macroblocks per chroma superblock */
+ mb_phase = macroblock_phase_444[i];
+ mv = mb->mv[mb_phase];
+ break;
}
+ }else
+ mv = mb->mv[0];
- if(TQB(cpi,mb->mode,fi,mv,plane,req,rc,keyframe,&uncoded_ssd,&coded_ssd,&coded_cost)){
- sb_coded=1;
- coded++;
- }else{
- sb_partial=1;
- }
+ if(TQB(cpi,ps,mb->mode,fi,mv,0,0,&mo,rc)){
+ fr_codeblock(cpi,fr);
+ coded++;
+ }else{
+ fr_skipblock(cpi,fr);
}
}
- break;
-
- case OC_PF_444:
- /* sixteen blocks / four macroblocks per chroma superblock */
- for(i=0;i<16;i++){
- int fi = sb->f[i];
- if(cp[fi]){
- macroblock_t *mb = &cpi->macro[sb->m[i]];
- int mb_phase = macroblock_phase_444[i];
-
- if(TQB(cpi,mb->mode,fi,mb->mv[mb_phase],plane,req,rc,keyframe,&uncoded_ssd,&coded_ssd,&coded_cost)){
- sb_coded=1;
- coded++;
- }else{
- sb_partial=1;
- }
- }
- }
-
- break;
}
- sb->coded = sb_coded;
- sb->partial = sb_partial;
- return coded;
-
+ return coded;
}
int PickModes(CP_INSTANCE *cpi, int recode){
unsigned char qi = cpi->BaseQ; // temporary
superblock_t *sb = cpi->super[0];
superblock_t *sb_end;
- int i,j,k;
+ int i,j;
ogg_uint32_t interbits = 0;
ogg_uint32_t intrabits = 0;
mc_state mcenc;
mv_t last_mv = {0,0};
mv_t prior_mv = {0,0};
- ogg_int16_t req[2][3][64];
long rho_count[65];
+ plane_state_t ps;
+ fr_state_t fr;
+
#ifdef COLLECT_METRICS
int sad[8][3][4];
#endif
oc_mode_scheme_chooser_init(cpi);
+ ps_setup_frame(cpi,&ps);
+ ps_setup_plane(cpi,&ps,0);
+ fr_clear(cpi,&fr);
+
memset(rho_count,0,sizeof(rho_count));
-
- for(i=0;i<2;i++)
- for(j=0;j<3;j++)
- for(k=0;k<64;k++)
- req[i][j][k]=cpi->quant_tables[i][j][k][qi];
-
cpi->MVBits_0 = 0;
cpi->MVBits_1 = 0;
@@ -893,8 +895,6 @@
sb = cpi->super[0];
sb_end = sb + cpi->super_n[0];
for(; sb<sb_end; sb++){
- sb->coded=0;
- sb->partial=0;
for(j = 0; j<4; j++){ /* mode addressing is through Y plane, always 4 MB per SB */
int mbi = sb->m[j];
@@ -931,7 +931,7 @@
if(cpi->FrameType == KEY_FRAME){
mb->mode = CODE_INTRA;
/* Transform, quantize, collect rho metrics */
- TQMB_Y(cpi, mb, j, qi, req, rho_count, 1, 0);
+ TQMB_Y(cpi, mb, j, &ps, rho_count, 0, &fr);
}else{
@@ -1022,7 +1022,7 @@
mb->mode = mode;
/* Transform, quantize, collect rho metrics */
- if(TQMB_Y(cpi, mb, j, qi, req, rho_count, 0, overhead[mode])){
+ if(TQMB_Y(cpi, mb, j, &ps, rho_count, overhead[mode], &fr)){
switch(mb->mode){
case CODE_INTER_PLUS_MV:
@@ -1047,6 +1047,8 @@
else
last_mv = mb->mv[0];
break;
+ default:
+ break;
}
oc_mode_set(cpi,mb,mb->mode);
@@ -1055,19 +1057,26 @@
}
}
}
+ fr_finishsb(cpi,&fr);
}
/* code chroma U */
sb = cpi->super[1];
sb_end = sb + cpi->super_n[1];
- for(; sb<sb_end; sb++)
- TQSB_UV(cpi, sb, 1, qi, req, rho_count, cpi->FrameType == KEY_FRAME);
+ ps_setup_plane(cpi,&ps,1);
+ for(; sb<sb_end; sb++){
+ TQSB_UV(cpi, sb, &ps, rho_count, &fr);
+ fr_finishsb(cpi,&fr);
+ }
/* code chroma V */
sb = cpi->super[2];
sb_end = sb + cpi->super_n[2];
- for(; sb<sb_end; sb++)
- TQSB_UV(cpi, sb, 2, qi, req, rho_count, cpi->FrameType == KEY_FRAME);
+ ps_setup_plane(cpi,&ps,2);
+ for(; sb<sb_end; sb++){
+ TQSB_UV(cpi, sb, &ps, rho_count, &fr);
+ fr_finishsb(cpi,&fr);
+ }
for(i=1;i<65;i++)
rho_count[i]+=rho_count[i-1];
@@ -1093,7 +1102,7 @@
to code them. */
{
ogg_uint32_t bits = oggpackB_bits(cpi->oggbuffer);
- PackAndWriteDFArray(cpi);
+ fr_write(cpi,&fr);
interbits += ((oggpackB_bits(cpi->oggbuffer) - bits) << OC_BIT_SCALE);
}
More information about the commits
mailing list