[xiph-commits] r15555 - branches/theora-thusnelda/lib/enc
xiphmont at svn.xiph.org
xiphmont at svn.xiph.org
Wed Dec 3 02:34:41 PST 2008
Author: xiphmont
Date: 2008-12-03 02:34:41 -0800 (Wed, 03 Dec 2008)
New Revision: 15555
Modified:
branches/theora-thusnelda/lib/enc/codec_internal.h
branches/theora-thusnelda/lib/enc/dct_encode.c
branches/theora-thusnelda/lib/enc/encode.c
branches/theora-thusnelda/lib/enc/encoder_toplevel.c
branches/theora-thusnelda/lib/enc/mode.c
Log:
Ongoing skip refinement work as prompted by OVA slideshow transcodes
Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================
--- branches/theora-thusnelda/lib/enc/codec_internal.h 2008-12-03 00:21:45 UTC (rev 15554)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h 2008-12-03 10:34:41 UTC (rev 15555)
@@ -344,7 +344,7 @@
token_checkpoint_t *stack,
int n);
extern void dct_tokenize_init (CP_INSTANCE *cpi);
-extern void dct_tokenize_AC (CP_INSTANCE *cpi,
+extern int dct_tokenize_AC (CP_INSTANCE *cpi,
int fi,
ogg_int16_t *dct,
ogg_int16_t *dequant,
Modified: branches/theora-thusnelda/lib/enc/dct_encode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/dct_encode.c 2008-12-03 00:21:45 UTC (rev 15554)
+++ branches/theora-thusnelda/lib/enc/dct_encode.c 2008-12-03 10:34:41 UTC (rev 15555)
@@ -5,7 +5,7 @@
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
- * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
+ * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
@@ -224,6 +224,12 @@
48,48,48,48,48,48,48,48,
48,48,48,48,48,48,48,48};
+/* only counts bits */
+static int tokencost(CP_INSTANCE *cpi, int huff, int coeff, int token){
+ huff += acoffset[coeff];
+ return cpi->HuffCodeLengthArray_VP3x[huff][token] + cpi->ExtraBitLengths_VP3x[token];
+}
+
void tokenlog_rollback(CP_INSTANCE *cpi, token_checkpoint_t *stack,int n){
int i;
for(i=n-1;i>=0;i--){
@@ -316,11 +322,15 @@
tokenlog_metrics(cpi,coeff,chroma,token);
}
-static void tokenize_eobrun(CP_INSTANCE *cpi, int pos, int run, token_checkpoint_t **stack){
+static int tokenize_eobrun(CP_INSTANCE *cpi, int pos, int run, token_checkpoint_t **stack){
int token=0,eb=0;
int chroma = !(run&0x8000);
+ int huff = cpi->huffchoice[cpi->FrameType!=KEY_FRAME][1][chroma];
+
make_eobrun_token(run&0x7fff, &token, &eb);
token_add(cpi, chroma, pos, token, eb, stack);
+
+ return tokencost(cpi,huff,pos,token);
}
@@ -360,7 +370,7 @@
token_checkpoint_t **stack){
int eb=0;
int token=make_dct_token(cpi,coeff,coeff2,val,&eb);
-
+
/* Emit pending EOB run if any */
if(cpi->eob_run[coeff]){
tokenize_eobrun(cpi,coeff,cpi->eob_run[coeff],stack);
@@ -378,19 +388,20 @@
return 1;
}
-static void tokenize_mark_run(CP_INSTANCE *cpi,
+static int tokenize_mark_run(CP_INSTANCE *cpi,
int chroma,
int fi,
int pre,
int coeff,
token_checkpoint_t **stack){
-
+ int cost = 0;
+
if(pre && cpi->dct_token_count[coeff] == 0){
if(stack)tokenlog_mark(cpi,coeff,stack); /* log an undo without logging a token */
cpi->eob_pre[coeff]++;
}else{
if((cpi->eob_run[coeff]&0x7fff) == 4095){
- tokenize_eobrun(cpi,coeff,cpi->eob_run[coeff],stack);
+ cost += tokenize_eobrun(cpi,coeff,cpi->eob_run[coeff],stack);
cpi->eob_run[coeff] = 0;
}
@@ -401,14 +412,9 @@
#ifdef COLLECT_METRICS
cpi->dct_eob_fi_stack[coeff][cpi->dct_eob_fi_count[coeff]++]=fi;
#endif
+ return cost;
}
-/* only counts bits */
-static int tokencost(CP_INSTANCE *cpi, int huff, int coeff, int token){
- huff += acoffset[coeff];
- return cpi->HuffCodeLengthArray_VP3x[huff][token] + cpi->ExtraBitLengths_VP3x[token];
-}
-
static int tokenize_dctcost(CP_INSTANCE *cpi,int chroma,
int coeff, int coeff2, int val){
int huff = cpi->huffchoice[cpi->FrameType!=KEY_FRAME][1][chroma];
@@ -462,10 +468,11 @@
simply assume there will be a nonzero DC value and code. That's
not a true assumption but it can be fixed-up as DC is tokenized
later */
-void dct_tokenize_AC(CP_INSTANCE *cpi, int fi,
+int dct_tokenize_AC(CP_INSTANCE *cpi, int fi,
ogg_int16_t *dct, ogg_int16_t *dequant, ogg_int16_t *origdct,
int chroma, token_checkpoint_t **stack){
int coeff = 1; /* skip DC for now */
+ int retcost = 0;
while(coeff < BLOCK_SIZE){
int i = coeff;
int ret;
@@ -474,45 +481,48 @@
if ( i == BLOCK_SIZE ){
- tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
+ retcost += tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
coeff = BLOCK_SIZE;
}else{
/* determine costs for encoding this value (and any preceeding
eobrun/zerorun) as well as the cost for encoding a demoted token */
- int cost = tokenize_dctcost(cpi,chroma,coeff,i,dct[i]);
+ int costA = tokenize_dctcost(cpi,chroma,coeff,i,dct[i]),costB;
+ int costD = costA;
int dval = (dct[i]>0 ? dct[i]-1 : dct[i]+1);
int j=i;
if(dval){
/* demoting will not produce a zero. */
- cost -= tokenize_dctcost(cpi,chroma,coeff,i,dval);
+ costD -= costB = tokenize_dctcost(cpi,chroma,coeff,i,dval);
}else{
/* demoting token will produce a zero. */
j=i+1;
+ costB = 0;
while((j < BLOCK_SIZE) && !dct[j] ) j++;
if(j==BLOCK_SIZE){
- cost += tokenize_eobcost(cpi,chroma,i+1);
- cost -= tokenize_eobcost(cpi,chroma,coeff);
+ costD += tokenize_eobcost(cpi,chroma,i+1);
+ costD -= tokenize_eobcost(cpi,chroma,coeff);
}else{
- cost += tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
- cost -= tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
+ costD += tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
+ costD -= tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
}
}
- if(cost>0){
+ if(costD>0){
/* demoting results in a cheaper token cost. Is the bit savings worth the added distortion? */
int ii = dezigzag_index[i];
int od = dct[i]*dequant[i] - origdct[ii];
int dd = dval*dequant[i] - origdct[ii];
int delta = dd*dd - od*od;
- if(delta < cost*cpi->token_lambda){
+ if(delta < costD*cpi->token_lambda){
/* we have a winner. Demote token */
dct[i]=dval;
+ costA=costB;
if(dval==0){
if(j==BLOCK_SIZE){
- tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
+ retcost += tokenize_mark_run(cpi,chroma,fi,coeff>1,coeff,stack);
coeff = BLOCK_SIZE;
break;
}else{
@@ -522,6 +532,7 @@
}
}
}
+ retcost+=costA;
ret = tokenize_dctval(cpi, chroma, fi, coeff, i, dct[i], stack);
if(!ret)
@@ -530,6 +541,7 @@
}
}
+ return retcost;
}
/* called after AC tokenization is complete, because DC coding has to
@@ -547,7 +559,7 @@
int val = cpi->frag_dc[fi];
int token1 = cpi->dct_token[1][*idx1];
int eb1 = cpi->dct_token_eb[1][*idx1];
-
+
if(!*run1) *run1 = decode_eob_token(token1, eb1);
if(val){
Modified: branches/theora-thusnelda/lib/enc/encode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encode.c 2008-12-03 00:21:45 UTC (rev 15554)
+++ branches/theora-thusnelda/lib/enc/encode.c 2008-12-03 10:34:41 UTC (rev 15555)
@@ -20,24 +20,18 @@
#include "codec_internal.h"
#include "encoder_lookup.h"
-static int predict_frag(CP_INSTANCE *cpi,
- int wpc,
- int fi,
- int fi_down,
- int fixup){
+static int predict_frag(int wpc,
+ ogg_int16_t *dc,
+ ogg_int16_t *down,
+ int *last){
- ogg_int16_t *dc = cpi->frag_dc;
-
- if(fixup>=0)
- dc[fixup] -= dc[fi];
-
if(wpc){
ogg_int16_t DC = 0;
- if(wpc&0x1) DC += pc[wpc][0]*dc[fi-1];
- if(wpc&0x2) DC += pc[wpc][1]*dc[fi_down-1];
- if(wpc&0x4) DC += pc[wpc][2]*dc[fi_down];
- if(wpc&0x8) DC += pc[wpc][3]*dc[fi_down+1];
+ if(wpc&0x1) DC += pc[wpc][0]* *(dc-1);
+ if(wpc&0x2) DC += pc[wpc][1]* *(down-1);
+ if(wpc&0x4) DC += pc[wpc][2]* *(down);
+ if(wpc&0x8) DC += pc[wpc][3]* *(down+1);
/* if we need to do a shift */
if(pc[wpc][4]) {
@@ -49,41 +43,49 @@
/* check for outranging on the two predictors that can outrange */
if((wpc&(PU|PUL|PL)) == (PU|PUL|PL)){
- if( abs(DC - dc[fi_down]) > 128) {
- DC = dc[fi_down];
- } else if( abs(DC - dc[fi-1]) > 128) {
- DC = dc[fi-1];
- } else if( abs(DC - dc[fi_down-1]) > 128) {
- DC = dc[fi_down-1];
+ if( abs(DC - *down) > 128) {
+ DC = *down;
+ } else if( abs(DC - *(dc-1)) > 128) {
+ DC = *(dc-1);
+ } else if( abs(DC - *(down-1)) > 128) {
+ DC = *(down-1);
}
}
- dc[fi] -= DC;
- return -1;
+ *last = *dc;
+ return *dc - DC;
}else{
- return fi;
+ int ret = *dc - *last;
+ *last = *dc;
+ return ret;
}
}
static void PredictDC(CP_INSTANCE *cpi){
ogg_int32_t pi;
- int fixup[3]; /* last value used for given frame */
- int y,x,fi = cpi->frag_total-1;
+ int last[3]; /* last value used for given frame */
+ int y,x,fi = 0;
unsigned char *cp = cpi->frag_coded;
/* for y,u,v; handles arbitrary plane subsampling arrangement. Shouldn't need to be altered for 4:2:2 or 4:4:4 */
- for (pi=2; pi>=0; pi--) {
+ for (pi=0; pi<3; pi++) {
int v = cpi->frag_v[pi];
int h = cpi->frag_h[pi];
int subh = !(pi && cpi->info.pixelformat != OC_PF_444);
int subv = !(pi && cpi->info.pixelformat == OC_PF_420);
+ ogg_int16_t dc[h];
+ ogg_int16_t down[h];
- for(x=0;x<3;x++)fixup[x]=-1;
+ for(x=0;x<3;x++)last[x]=0;
- for (y=v-1; y>=0 ; y--) {
+ for (y=0; y<v ; y++) {
macroblock_t *mb_row = cpi->macro + (y>>subv)*cpi->macro_h;
macroblock_t *mb_down = cpi->macro + ((y-1)>>subv)*cpi->macro_h;
- for (x=h-1; x>=0; x--, fi--) {
+
+ memcpy(down,dc,sizeof(down));
+ memcpy(dc,cpi->frag_dc+fi,sizeof(dc));
+
+ for (x=0; x<h; x++, fi++) {
if(cp[fi]) {
int wpc=0;
int wf = Mode2Frame[mb_row[x>>subh].mode];
@@ -97,7 +99,7 @@
if(x+1<h && cp[fi-h+1] && Mode2Frame[mb_down[(x+1)>>subh].mode] == wf) wpc|=8; /* down right */
}
- fixup[wf]=predict_frag(cpi,wpc,fi,fi-h,fixup[wf]);
+ cpi->frag_dc[fi]=predict_frag(wpc,dc+x,down+x,last+wf);
}
}
}
@@ -306,7 +308,11 @@
}
}
+#include <stdio.h>
void EncodeData(CP_INSTANCE *cpi){
+ long modebits=0;
+ long mvbits=0;
+ long dctbits;
long bits;
PredictDC(cpi);
@@ -314,21 +320,88 @@
/* Mode and MV data not needed for key frames. */
if ( cpi->FrameType != KEY_FRAME ){
+ int prebits = oggpackB_bits(cpi->oggbuffer);
PackModes(cpi);
- bits = oggpackB_bits(cpi->oggbuffer);
+ modebits = oggpackB_bits(cpi->oggbuffer)-prebits;
+ prebits = oggpackB_bits(cpi->oggbuffer);
PackMotionVectors (cpi);
- bits = oggpackB_bits(cpi->oggbuffer);
+ mvbits = oggpackB_bits(cpi->oggbuffer)-prebits;
}
ChooseTokenTables(cpi);
+ {
+ int prebits = oggpackB_bits(cpi->oggbuffer);
+ EncodeTokenList(cpi);
+ dctbits = oggpackB_bits(cpi->oggbuffer)-prebits;
+ }
+
+ bits = oggpackB_bits(cpi->oggbuffer);
+ ReconRefFrames(cpi);
+
#ifdef COLLECT_METRICS
ModeMetrics(cpi);
+ {
+ int total = cpi->frag_total*64;
+ int fi=0,pi,x,y;
+ ogg_int64_t ssd=0;
+ double minimize;
+
+ for(pi=0;pi<3;pi++){
+ int bi = cpi->frag_buffer_index[fi];
+ unsigned char *frame = cpi->frame+bi;
+ unsigned char *recon = cpi->lastrecon+bi;
+ int stride = cpi->stride[pi];
+ int h = cpi->frag_h[pi]*8;
+ int v = cpi->frag_v[pi]*8;
+
+ for(y=0;y<v;y++){
+ int lssd=0;
+ for(x=0;x<h;x++)
+ lssd += (frame[x]-recon[x])*(frame[x]-recon[x]);
+ ssd+=lssd;
+ frame+=stride;
+ recon+=stride;
+ }
+ fi+=cpi->frag_n[pi];
+ }
+
+ minimize = ssd + (float)bits*cpi->token_lambda*16;
+
+ fprintf(stdout,"%d %d %d %d %f %f %f %ld %ld %ld %ld %f %f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f %.0f \n",
+ (int)cpi->CurrentFrame, // 0
+ cpi->BaseQ, // 1
+ cpi->token_lambda, // 2
+ cpi->skip_lambda, // 3
+ (double)cpi->rho_count[cpi->BaseQ]/total, // 4
+ (double)cpi->rho_postop/total, // 5
+ (double)cpi->rho_postop/cpi->rho_count[cpi->BaseQ], // 6
+ modebits, // 7
+ mvbits, // 8
+ dctbits, // 9
+ oggpackB_bits(cpi->oggbuffer), // 10
+ (double)ssd, // 11
+ (double)0,
+ (double)cpi->dist_dist[0][0],//13
+ (double)cpi->dist_dist[0][1],
+ (double)cpi->dist_dist[0][2],
+ (double)cpi->dist_dist[0][3],
+ (double)cpi->dist_dist[0][4],
+ (double)cpi->dist_dist[0][5],
+ (double)cpi->dist_dist[0][6],
+ (double)cpi->dist_dist[0][7],
+ (double)(cpi->dist_bits[0][0]>>7),//21
+ (double)(cpi->dist_bits[0][1]>>7),
+ (double)(cpi->dist_bits[0][2]>>7),
+ (double)(cpi->dist_bits[0][3]>>7),
+ (double)(cpi->dist_bits[0][4]>>7),
+ (double)(cpi->dist_bits[0][5]>>7),
+ (double)(cpi->dist_bits[0][6]>>7),
+ (double)(cpi->dist_bits[0][7]>>7)
+
+
+ );
+ }
#endif
- EncodeTokenList(cpi);
- bits = oggpackB_bits(cpi->oggbuffer);
-
- ReconRefFrames(cpi);
-
dsp_restore_fpu (cpi->dsp);
}
Modified: branches/theora-thusnelda/lib/enc/encoder_toplevel.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_toplevel.c 2008-12-03 00:21:45 UTC (rev 15554)
+++ branches/theora-thusnelda/lib/enc/encoder_toplevel.c 2008-12-03 10:34:41 UTC (rev 15555)
@@ -33,10 +33,6 @@
cpi->FrameType = KEY_FRAME;
cpi->LastKeyFrame = 0;
- /* code all blocks */
- for(i=0;i<cpi->frag_total;i++)
- cpi->frag_coded[i]=1;
-
/* mark as video frame */
oggpackB_write(cpi->oggbuffer,0,1);
@@ -53,9 +49,6 @@
oggpackB_reset(cpi->oggbuffer);
cpi->FrameType = DELTA_FRAME;
- for ( i = 0; i < cpi->frag_total; i++ )
- cpi->frag_coded[i] = 1; /* TEMPORARY */
-
/* mark as video frame */
oggpackB_write(cpi->oggbuffer,0,1);
@@ -67,10 +60,6 @@
cpi->FrameType = KEY_FRAME;
cpi->LastKeyFrame = 0;
- /* code all blocks */
- for(i=0;i<cpi->frag_total;i++)
- cpi->frag_coded[i]=1;
-
/* mark as video frame */
oggpackB_write(cpi->oggbuffer,0,1);
@@ -130,8 +119,8 @@
cpi->BaseQ = c->quality;
/* temporary while the RD code is only partially complete */
- cpi->skip_lambda=24;
- cpi->token_lambda=24;
+ cpi->skip_lambda=50;
+ cpi->token_lambda=50;
cpi->mv_lambda=0;
/* Set encoder flags. */
@@ -280,7 +269,7 @@
if(last_p){
cpi->doneflag=1;
#ifdef COLLECT_METRICS
- DumpMetrics(cpi);
+ //DumpMetrics(cpi);
#endif
}
return 1;
Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c 2008-12-03 00:21:45 UTC (rev 15554)
+++ branches/theora-thusnelda/lib/enc/mode.c 2008-12-03 10:34:41 UTC (rev 15555)
@@ -71,6 +71,7 @@
*/
+#include<stdio.h>
void oc_mode_scheme_chooser_init(CP_INSTANCE *cpi){
oc_mode_scheme_chooser *chooser = &cpi->chooser;
int i;
@@ -298,14 +299,13 @@
}
static int cost_intra(CP_INSTANCE *cpi, int qi, int mbi, ogg_uint32_t *intrabits, int *overhead){
- unsigned char *cp = cpi->frag_coded;
macroblock_t *mb = &cpi->macro[mbi];
int i,j;
int cost = 0;
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BIntraSAD(cpi,fi,i);
cost += BINMAP(mode_rate[qi][i][1],sad);
}
@@ -318,14 +318,13 @@
}
static int cost_inter(CP_INSTANCE *cpi, int qi, int mbi, mv_t mv, int mode, int *overhead){
- unsigned char *cp = cpi->frag_coded;
macroblock_t *mb = &cpi->macro[mbi];
int i,j;
int cost = 0;
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BInterSAD(cpi,fi,i,mode==CODE_USING_GOLDEN,mv);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -336,18 +335,17 @@
}
static int cost_inter_nomv(CP_INSTANCE *cpi, int qi, int mbi, int *overhead){
- unsigned char *cp = cpi->frag_coded;
macroblock_t *mb = &cpi->macro[mbi];
int i,j;
int cost = 0;
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int bi = cpi->frag_buffer_index[fi];
int stride = cpi->stride[i];
int sad = dsp_sad8x8 (cpi->dsp, cpi->frame+bi, cpi->lastrecon+bi, stride);
-
+
if(i)sad<<=2;
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -358,7 +356,6 @@
}
static int cost_inter1mv(CP_INSTANCE *cpi, int qi, int mbi, int golden, int *bits0, int *overhead){
- unsigned char *cp = cpi->frag_coded;
macroblock_t *mb = &cpi->macro[mbi];
int i,j;
int cost = 0;
@@ -366,7 +363,7 @@
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BInterSAD(cpi,fi,i,golden,mb->analysis_mv[0][golden]);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -386,7 +383,6 @@
static int cost_inter4mv(CP_INSTANCE *cpi, int qi, int mbi, int *bits0, int *bits1, int *overhead){
int pf = cpi->info.pixelformat;
- unsigned char *cp = cpi->frag_coded;
macroblock_t *mb = &cpi->macro[mbi];
int i,j;
int cost = 0;
@@ -395,15 +391,14 @@
for(j=0;j<4;j++){
int fi=mb->Ryuv[0][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BInterSAD(cpi,fi,0,0,mb->mv[j]);
cost += BINMAP(mode_rate[qi][0][0],sad);
-
+
*bits0 +=
MvBits[mb->mv[j].x + MAX_MV_EXTENT] +
MvBits[mb->mv[j].y + MAX_MV_EXTENT];
*bits1 += 12;
-
}
}
@@ -420,10 +415,9 @@
for(i=1;i<3;i++){
int fi=mb->Ryuv[i][0];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BInterSAD(cpi,fi,i,0,ch);
cost += BINMAP(mode_rate[qi][i][0],sad);
-
}
}
}
@@ -446,7 +440,7 @@
for(i=1;i<3;i++){
for(j=0;j<2;j++){
int fi=mb->Ryuv[i][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BInterSAD(cpi,fi,i,0,mv[j]);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -459,7 +453,7 @@
for(i=1;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
- if(cp[fi]){
+ if(fi<cpi->frag_total){
int sad = BInterSAD(cpi,fi,i,0,mb->mv[j]);
cost += BINMAP(mode_rate[qi][i][0],sad);
}
@@ -486,9 +480,10 @@
}
typedef struct{
- int uncoded_ssd;
- int coded_ssd;
- int sad_cost;
+ int uncoded_ac_ssd;
+ int coded_ac_ssd;
+ int ac_cost;
+ int dc_flag;
} rd_metric_t;
typedef struct{
@@ -547,11 +542,14 @@
int nonzero=63;
ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
int uncoded_ssd=0,coded_ssd=0,coded_partial_ssd=0,sad=0;
+ int uncoded_dc=0,coded_dc=0,dc_flag=0;
int lambda = cpi->skip_lambda;
token_checkpoint_t *checkpoint=*stack;
- int sad_cost=0;
+ int sad_cost=0,cost;
int i;
+ cpi->frag_coded[fi]=1;
+
/* motion comp */
switch(mode){
case CODE_INTER_PLUS_MV:
@@ -590,18 +588,6 @@
break;
}
- if(!keyframe){
- if(mode==CODE_INTER_NO_MV){
- for(i=0;i<64;i++)
- uncoded_ssd += data[i]*data[i];
- }else{
- dsp_sub8x8(cpi->dsp, frame_ptr, cpi->lastrecon+bi, buffer, stride);
- for(i=0;i<64;i++)
- uncoded_ssd += buffer[i]*buffer[i];
- }
- }
- uncoded_ssd <<= 4; /* scale to match DCT domain */
-
if(mode==CODE_INTRA){
int acc=0;
for(i=0;i<64;i++)
@@ -622,15 +608,31 @@
cpi->frag_sad[fi]=sad;
#endif
+ if(!keyframe){
+ if(mode==CODE_INTER_NO_MV){
+ for(i=0;i<64;i++){
+ uncoded_ssd += data[i]*data[i];
+ uncoded_dc += data[i];
+ }
+ }else{
+ dsp_sub8x8(cpi->dsp, frame_ptr, cpi->lastrecon+bi, buffer, stride);
+ for(i=0;i<64;i++){
+ uncoded_ssd += buffer[i]*buffer[i];
+ uncoded_dc += buffer[i];
+ }
+ }
+ uncoded_ssd*=ps->ssdmul;
+ uncoded_ssd <<= 4; /* scale to match DCT domain */
+ sad_cost = BINMAP(mode_rate[qi][ps->plane][mode==CODE_INTRA],sad);
+ }
+
/* transform */
dsp_fdct_short(cpi->dsp, data, buffer);
/* collect rho metrics, quantize */
{
int i;
- int dcshift = (mode==CODE_INTRA?1:0); /* temporary hysteresis
- until DC opt is in */
- quant_tables *qq = ps->qq[mode != CODE_INTRA];
+ //quant_tables *qq = ps->qq[mode != CODE_INTRA];
{
int d;
@@ -642,9 +644,9 @@
//for(pos=64;pos>0;pos--)
//if(val < qqq[pos-1])break;
- rho_count[pos]++;
+ //rho_count[pos]++;
- if((abs(buffer[0])<<dcshift)>=dequant[0]){
+ if(abs(buffer[0])>=dequant[0]){
int val = (((iq[0]>>15)*buffer[0]) + (1<<15) + (((iq[0]&0x7fff)*buffer[0])>>15)) >>16;
val = (val>511?511:(val<-511?-511:val));
@@ -679,10 +681,14 @@
data[i] = val;
}
}
+
+ /* for undersampled planes */
+ coded_partial_ssd*=ps->ssdmul;
+
}
+ cpi->frag_dc[fi] = data[0];
- cpi->frag_dc[fi] = data[0];
-
+#if 0
/* small performance short-circuit:
Because roundoff error means that C2 preservation can't really be
@@ -692,32 +698,31 @@
expect it to be... off... especially at low energies.
If the partial_ssd indicates this block is not worth the bits by
- some large margin, don't proceed / bother to get a more precise
+ some large margin, don't proceed or bother to get a more precise
determination */
+
if(!keyframe){
- sad_cost = BINMAP(mode_rate[qi][ps->plane][mode==CODE_INTRA],sad);
- if(sad_cost<0)sad_cost=0; /* some of the trained fits can return a negative cost for zero entropy */
- /* for undersampled planes */
- coded_partial_ssd*=ps->ssdmul;
- uncoded_ssd*=ps->ssdmul;
+ /* Don't short circuit if there's a chance of coding a DC component */
+ if( (mode != CODE_INTRA && data[0]==0) ||
+ (mode == CODE_INTRA && abs( buffer[0] - (uncoded_dc>>1) + 4096 ) < (dequant[0]>>1))){
- mo->uncoded_ssd+=uncoded_ssd;
-
- /* the partial_ssd underreports distortion, so this comparison
- will only yield false negatives, which are harmless */
- if(uncoded_ssd <= coded_partial_ssd+coding_overhead*lambda+((sad_cost*lambda)>>OC_BIT_SCALE)){
- /* SKIP */
-
- uncode_frag(cpi,fi,ps->plane);
- mo->coded_ssd+=uncoded_ssd; /* We may still be coding the MB even if not this block */
- return 0;
-
+ /* the partial_ssd underreports distortion, so this comparison
+ will only yield false negatives, which are harmless */
+ if(uncoded_ssd <= coded_partial_ssd+coding_overhead*lambda+((sad_cost*lambda)>>OC_BIT_SCALE)){
+ /* SKIP */
+
+ uncode_frag(cpi,fi,ps->plane);
+ mo->coded_ssd+=uncoded_ssd; /* We may still be coding the MB even if not this block */
+ return 0;
+
+ }
}
}
+#endif
/* tokenize */
- dct_tokenize_AC(cpi, fi, data, dequant, buffer, fi>=cpi->frag_n[0], stack);
+ cost = dct_tokenize_AC(cpi, fi, data, dequant, buffer, fi>=cpi->frag_n[0], stack);
/* reconstruct */
while(!data[nonzero] && --nonzero);
@@ -746,33 +751,45 @@
which to do so.*/
/* for now, straight up SSD */
dsp_sub8x8(cpi->dsp, frame_ptr, thisrecon, buffer, stride);
- for(i=0;i<64;i++)
+ for(i=0;i<64;i++){
coded_ssd += buffer[i]*buffer[i];
+ coded_dc += buffer[i];
+ }
coded_ssd <<= 4; /* scale to match DCT domain */
+ coded_ssd*=ps->ssdmul; /* for undersampled planes */
+
+ /* We actually only want the AC contribution to the SSDs */
+ uncoded_ssd -= ((uncoded_dc*uncoded_dc)>>2);
+ coded_ssd -= ((coded_dc*coded_dc)>>2);
+ mo->uncoded_ac_ssd+=uncoded_ssd;
- /* for undersampled planes */
- coded_ssd*=ps->ssdmul;
-
- if(uncoded_ssd <= coded_ssd+coding_overhead*lambda+((sad_cost*lambda)>>OC_BIT_SCALE)){
+ /* DC is a special visual case; if there's more than a
+ half-quantizer improvement in the effective DC component, code
+ the block */
+ if( abs(uncoded_dc)-abs(coded_dc) > dequant[0]){
+ mo->dc_flag = dc_flag = 1;
+ }
+
+ if(!dc_flag && uncoded_ssd <= coded_ssd+(coding_overhead+cost)*lambda){
/* Hm, not worth it. roll back */
tokenlog_rollback(cpi, checkpoint, (*stack)-checkpoint);
*stack = checkpoint;
uncode_frag(cpi,fi,ps->plane);
-
- mo->coded_ssd+=uncoded_ssd;
-
+
+ mo->coded_ac_ssd+=uncoded_ssd;
+
return 0;
}else{
-
- mo->coded_ssd+=coded_ssd;
- mo->sad_cost+=sad_cost;
-
+
+ mo->coded_ac_ssd+=coded_ssd;
+ mo->ac_cost+=cost;
+
}
}
-
+
//for(i=0;i<64;i++)
//if(data[i]!=0)cpi->rho_postop++;
-
+
return 1;
}
@@ -816,11 +833,11 @@
if(cpi->FrameType != KEY_FRAME){
- if(coded){
+ if(coded && !mo.dc_flag){
/* block by block, still coding the MB. Now consider the
macroblock coding cost as a whole (mode and MV) */
- int codecost = mo.sad_cost+(fr_cost4(&fr_checkpoint,fr)<<OC_BIT_SCALE)+mode_overhead;
- if(mo.uncoded_ssd <= mo.coded_ssd+((cpi->skip_lambda*codecost)>>(OC_BIT_SCALE))){
+ int codecost = mo.ac_cost+fr_cost4(&fr_checkpoint,fr)+(mode_overhead>>OC_BIT_SCALE);
+ if(mo.uncoded_ac_ssd <= mo.coded_ac_ssd+cpi->skip_lambda*codecost){
/* taking macroblock overhead into account, it is not worth coding this MB */
tokenlog_rollback(cpi, stack, stackptr-stack);
@@ -884,7 +901,6 @@
int pf = cpi->info.pixelformat;
int i;
int coded = 0;
- unsigned char *cp=cpi->frag_coded;
rd_metric_t mo;
token_checkpoint_t stack[64*2]; /* worst case token usage for 1 fragment*/
memset(&mo,0,sizeof(mo));
@@ -892,12 +908,13 @@
for(i=0;i<16;i++){
int fi = sb->f[i];
int mb_phase;
- if(cp[fi]){
+
+ if(fi<cpi->frag_total){
token_checkpoint_t *stackptr = stack;
macroblock_t *mb = &cpi->macro[sb->m[i]];
mv_t mv;
if(mb->mode == CODE_INTER_FOURMV){
-
+
switch(pf){
case OC_PF_420:
/* sixteen blocks/macroblocks per chroma superblock */
@@ -908,7 +925,7 @@
mv.x = ( mv.x >= 0 ? (mv.x + 2) / 4 : (mv.x - 2) / 4);
mv.y = ( mv.y >= 0 ? (mv.y + 2) / 4 : (mv.y - 2) / 4);
break;
-
+
case OC_PF_422:
/* sixteen blocks / eight macroblocks per chroma superblock */
mb_phase = macroblock_phase_422[i];
@@ -925,7 +942,7 @@
}
}else
mv = mb->mv[0];
-
+
if(TQB(cpi,ps,mb->mode,fi,mv,fr_cost1(fr),&mo,rc,&stackptr)){
fr_codeblock(cpi,fr);
tokenlog_commit(cpi, stack, stackptr-stack);
@@ -1167,11 +1184,10 @@
fr_finishsb(cpi,&fr);
}
- for(i=1;i<65;i++)
- rho_count[i]+=rho_count[i-1];
+ //for(i=1;i<65;i++)
+ //rho_count[i]+=rho_count[i-1];
- memcpy(cpi->rho_count,rho_count,sizeof(rho_count));
-
+ //memcpy(cpi->rho_count,rho_count,sizeof(rho_count));
if(cpi->FrameType != KEY_FRAME){
if(interbits>intrabits) return 1; /* short circuit */
More information about the commits
mailing list