[xiph-commits] r15573 - branches/theora-thusnelda/lib/enc
xiphmont at svn.xiph.org
xiphmont at svn.xiph.org
Thu Dec 11 16:06:04 PST 2008
Author: xiphmont
Date: 2008-12-11 16:06:04 -0800 (Thu, 11 Dec 2008)
New Revision: 15573
Modified:
branches/theora-thusnelda/lib/enc/codec_internal.h
branches/theora-thusnelda/lib/enc/dct_encode.c
branches/theora-thusnelda/lib/enc/encoder_toplevel.c
branches/theora-thusnelda/lib/enc/frarray.c
branches/theora-thusnelda/lib/enc/mode.c
Log:
Addition SKIP and token opt fixes/improvements
SKIP and token opt lambdas now reconciled.
Modified: branches/theora-thusnelda/lib/enc/codec_internal.h
===================================================================
--- branches/theora-thusnelda/lib/enc/codec_internal.h 2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/codec_internal.h 2008-12-12 00:06:04 UTC (rev 15573)
@@ -293,9 +293,7 @@
/********************************************************************/
/* Setup */
int keyframe_granule_shift;
- int skip_lambda;
- int mv_lambda;
- int token_lambda;
+ int lambda;
int BaseQ;
int GoldenFrameEnabled;
int InterPrediction;
Modified: branches/theora-thusnelda/lib/enc/dct_encode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/dct_encode.c 2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/dct_encode.c 2008-12-12 00:06:04 UTC (rev 15573)
@@ -480,52 +480,47 @@
while(i < BLOCK_SIZE){
int ret;
+ int od = origdct[dezigzag_index[i]];
+ int bestd=0,d = dct[i];
+ int bestmin;
+ int cost,cost2=0,bestcost=0;
+ int j=i+1,k;
- /* determine costs for encoding this value (and any preceeding
- eobrun/zerorun) as well as the cost for encoding a demoted token */
- int costA = tokenize_dctcost(cpi,chroma,coeff,i,dct[i]),costB;
- int costD = costA;
- int dval = (dct[i]>0 ? dct[i]-1 : dct[i]+1);
- int j=i+1;
while((j < BLOCK_SIZE) && !dct[j] ) j++;
- if(dval){
- /* demoting will not produce a zero. */
- costD -= costB = tokenize_dctcost(cpi,chroma,coeff,i,dval);
+ if(j==BLOCK_SIZE){
+ cost = tokenize_eobcost(cpi,chroma,coeff);
+ if(i+1<BLOCK_SIZE)
+ cost2 = tokenize_eobcost(cpi,chroma,i+1);
}else{
- /* demoting token will produce a zero. */
- costB = 0;
- if(j==BLOCK_SIZE){
- if(i+1<BLOCK_SIZE)
- costD += tokenize_eobcost(cpi,chroma,i+1);
- costD -= tokenize_eobcost(cpi,chroma,coeff);
- }else{
- costD += tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
- costD -= tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
- }
+ cost = tokenize_dctcost(cpi,chroma,coeff,j,dct[j]);
+ cost2 = tokenize_dctcost(cpi,chroma,i+1,j,dct[j]);
}
+ bestmin = od*od+cost*cpi->lambda;
+
- if(costD>0){
- /* demoting results in a cheaper token cost. Is the bit savings worth the added distortion? */
- int ii = dezigzag_index[i];
- int od = dct[i]*dequant[i] - origdct[ii];
- int dd = dval*dequant[i] - origdct[ii];
- int delta = dd*dd - od*od;
-
- if(delta < costD*cpi->token_lambda){
- /* we have a winner. Demote token */
- dct[i]=dval;
- costA=costB;
-
- if(dval==0){
- if(j==BLOCK_SIZE) break;
- i=j;
- continue;
- }
+ for(k=1;k<=abs(d);k++){
+ int dval = (d>0 ? k : -k);
+ int dd = dval*dequant[i] - od;
+ int min = dd*dd;
+ cost = tokenize_dctcost(cpi,chroma,coeff,i,dval);
+
+ min += (cost+cost2)*cpi->lambda;
+ if(min<bestmin){
+ bestmin=min;
+ bestcost=cost;
+ bestd=dval;
}
}
- retcost+=costA;
+ dct[i]=bestd;
+ if(bestd==0){
+ if(j==BLOCK_SIZE) break;
+ i=j;
+ continue;
+ }
+
+ retcost+=bestcost;
ret = tokenize_dctval(cpi, chroma, fi, coeff, i, dct[i], stack);
if(!ret)
Modified: branches/theora-thusnelda/lib/enc/encoder_toplevel.c
===================================================================
--- branches/theora-thusnelda/lib/enc/encoder_toplevel.c 2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/encoder_toplevel.c 2008-12-12 00:06:04 UTC (rev 15573)
@@ -119,9 +119,7 @@
cpi->BaseQ = c->quality;
/* temporary while the RD code is only partially complete */
- cpi->skip_lambda=0;
- cpi->token_lambda=0;
- cpi->mv_lambda=0;
+ cpi->lambda=200;
/* Set encoder flags. */
/* if not AutoKeyframing cpi->ForceKeyFrameEvery = is frequency */
Modified: branches/theora-thusnelda/lib/enc/frarray.c
===================================================================
--- branches/theora-thusnelda/lib/enc/frarray.c 2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/frarray.c 2008-12-12 00:06:04 UTC (rev 15573)
@@ -414,9 +414,9 @@
fr_skipblock(NULL,&temp);
fr_skipblock(NULL,&temp);
fr_skipblock(NULL,&temp);
- fr_finishsb(NULL,&temp);
+ //fr_finishsb(NULL,&temp);
cost=temp.cost;
temp=*post;
- fr_finishsb(NULL,&temp);
+ //fr_finishsb(NULL,&temp);
return temp.cost - cost;
}
Modified: branches/theora-thusnelda/lib/enc/mode.c
===================================================================
--- branches/theora-thusnelda/lib/enc/mode.c 2008-12-10 13:21:50 UTC (rev 15572)
+++ branches/theora-thusnelda/lib/enc/mode.c 2008-12-12 00:06:04 UTC (rev 15573)
@@ -363,7 +363,6 @@
macroblock_t *mb = &cpi->macro[mbi];
int i,j;
int cost = 0;
-
for(i=0;i<3;i++){
for(j=0;j<4;j++){
int fi=mb->Ryuv[i][j];
@@ -527,6 +526,7 @@
}
/* coding overhead is unscaled */
+#include<stdio.h>
static int TQB (CP_INSTANCE *cpi, plane_state_t *ps, int mode, int fi, mv_t mv,
int coding_overhead, rd_metric_t *mo, long *rho_count,
token_checkpoint_t **stack){
@@ -544,9 +544,9 @@
unsigned char *thisrecon = cpi->recon+bi;
int nonzero=0;
const ogg_int16_t *dequant = ps->re_q[mode != CODE_INTRA][ps->plane];
- int uncoded_ssd=0,coded_ssd=0,coded_partial_ssd=0;
+ int uncoded_ssd=0,coded_ssd=0;
int uncoded_dc=0,coded_dc=0,dc_flag=0;
- int lambda = cpi->skip_lambda;
+ int lambda = cpi->lambda;
token_checkpoint_t *checkpoint=*stack;
int cost;
int i;
@@ -632,7 +632,6 @@
uncoded_dc += buffer[i];
}
}
- uncoded_ssd*=ps->ssdmul;
uncoded_ssd <<= 4; /* scale to match DCT domain */
}
@@ -656,26 +655,16 @@
//rho_count[pos]++;
if((abs(v)<<1)>=dequant[i]){
- int d;
int val = (((iq[i]>>15)*v) + (1<<15) + (((iq[i]&0x7fff)*v)>>15)) >>16;
- val = (val>511?511:(val<-511?-511:val));
-
- d = val*dequant[i]-v;
- coded_partial_ssd += d*d;
- data[i] = val;
+ data[i] = (val>511?511:(val<-511?-511:val));
nonzero=i;
}else{
- coded_partial_ssd += v*v;
data[i] = 0;
}
}
-
- /* for undersampled planes */
- coded_partial_ssd*=ps->ssdmul;
-
}
cpi->frag_dc[fi] = data[0];
-
+
/* tokenize */
cost = dct_tokenize_AC(cpi, fi, data, dequant, buffer, fi>=cpi->frag_n[0], stack);
@@ -700,27 +689,27 @@
int i;
/* in retrospect, should we have skipped this block? */
- /* we are free to apply any distortion measure we like as we have
- the full original block and fully reconstructed block with
- which to do so.*/
- /* for now, straight up SSD */
dsp_sub8x8(cpi->dsp, frame_ptr, thisrecon, buffer, stride);
for(i=0;i<64;i++){
coded_ssd += buffer[i]*buffer[i];
coded_dc += buffer[i];
}
coded_ssd <<= 4; /* scale to match DCT domain */
- coded_ssd*=ps->ssdmul; /* for undersampled planes */
/* We actually only want the AC contribution to the SSDs */
uncoded_ssd -= ((uncoded_dc*uncoded_dc)>>2);
coded_ssd -= ((coded_dc*coded_dc)>>2);
+
+ /* for undersampled planes */
+ //coded_ssd*=ps->ssdmul;
+ //uncoded_ssd*=ps->ssdmul;
+
mo->uncoded_ac_ssd+=uncoded_ssd;
- /* DC is a special visual case; if there's more than a
- half-quantizer improvement in the effective DC component, code
+ /* DC is a special case; if there's more than a full-quantizer
+ improvement in the effective DC component, always force-code
the block */
- if( abs(uncoded_dc)-abs(coded_dc) > dequant[0]){
+ if( abs(uncoded_dc)-abs(coded_dc) > (dequant[0]<<1)){
mo->dc_flag = dc_flag = 1;
}
@@ -731,10 +720,13 @@
uncode_frag(cpi,fi,ps->plane);
mo->coded_ac_ssd+=uncoded_ssd;
+ //fprintf(stderr,"skip(%d:%d)",coding_overhead,cost);
return 0;
}else{
+ //fprintf(stderr,"*****(%d:%d)",coding_overhead,cost);
+
mo->coded_ac_ssd+=coded_ssd;
mo->ac_cost+=cost;
@@ -791,7 +783,7 @@
/* block by block, still coding the MB. Now consider the
macroblock coding cost as a whole (mode and MV) */
int codecost = mo.ac_cost+fr_cost4(&fr_checkpoint,fr)+(mode_overhead>>OC_BIT_SCALE);
- if(mo.uncoded_ac_ssd <= mo.coded_ac_ssd+cpi->skip_lambda*codecost){
+ if(mo.uncoded_ac_ssd <= mo.coded_ac_ssd+cpi->lambda*codecost){
/* taking macroblock overhead into account, it is not worth coding this MB */
tokenlog_rollback(cpi, stack, stackptr-stack);
@@ -1271,26 +1263,23 @@
int fi = tfi[ti];
actual_bits[fi] += (bits<<OC_BIT_SCALE);
}else{
- /* EOB run; its bits should be split up between all the fragments in the run */
int run = parse_eob_run(token, cpi->dct_token_eb[group][ti]);
- int fracbits = ((bits<<OC_BIT_SCALE) + (run>>1))/run;
+ int fi = stack[eobcounts[group]];
+ actual_bits[fi]+=(bits<<OC_BIT_SCALE);
if(ti+1<tn){
/* tokens follow EOB so it must be entirely ensconced within this plane/group */
- while(run--){
- int fi = stack[eobcounts[group]++];
- actual_bits[fi]+=fracbits;
- }
+ eobcounts[group]+=run;
}else{
/* EOB is the last token in this plane/group, so it may span into the next plane/group */
int n = cpi->dct_eob_fi_count[group];
while(run){
- while(eobcounts[group] < n && run){
- int fi = stack[eobcounts[group]++];
- actual_bits[fi]+=fracbits;
- run--;
- }
+ int rem = n - eobcounts[group];
+ if(rem>run)rem=run;
+
+ eobcounts[group]+=rem;
+ run -= rem;
if(run){
group++;
n = cpi->dct_eob_fi_count[group];
@@ -1345,36 +1334,35 @@
for(fi=0;fi<v;fi++)
if(cp[fi]){
int mbi = mp[fi];
- if(mbi>=0){
- macroblock_t *mb = &cpi->macro[mbi];
- int mode = mb->mode;
- int plane = (fi<y ? 0 : (fi<u ? 1 : 2));
- int bin = BIN(sp[fi]);
- mode_metric[qi][plane][mode==CODE_INTRA].frag[bin]++;
- mode_metric[qi][plane][mode==CODE_INTRA].sad[bin] += sp[fi];
- mode_metric[qi][plane][mode==CODE_INTRA].bits[bin] += actual_bits[fi];
+ macroblock_t *mb = &cpi->macro[mbi];
+ int mode = mb->mode;
+ int plane = (fi<y ? 0 : (fi<u ? 1 : 2));
+ int bin = BIN(sp[fi]);
+ mode_metric[qi][plane][mode==CODE_INTRA].frag[bin]++;
+ mode_metric[qi][plane][mode==CODE_INTRA].sad[bin] += sp[fi];
+ mode_metric[qi][plane][mode==CODE_INTRA].bits[bin] += actual_bits[fi];
+
+ if(0){
+ int bi = cpi->frag_buffer_index[fi];
+ unsigned char *frame = cpi->frame+bi;
+ unsigned char *recon = cpi->lastrecon+bi;
+ int stride = cpi->stride[plane];
+ int lssd=0;
+ int xi,yi;
- if(0){
- int bi = cpi->frag_buffer_index[fi];
- unsigned char *frame = cpi->frame+bi;
- unsigned char *recon = cpi->lastrecon+bi;
- int stride = cpi->stride[plane];
- int lssd=0;
- int xi,yi;
-
- for(yi=0;yi<8;yi++){
- for(xi=0;xi<8;xi++)
- lssd += (frame[xi]-recon[xi])*(frame[xi]-recon[xi]);
- frame+=stride;
- recon+=stride;
- }
- cpi->dist_dist[plane][mode] += lssd;
- cpi->dist_bits[plane][mode] += actual_bits[fi];
+ for(yi=0;yi<8;yi++){
+ for(xi=0;xi<8;xi++)
+ lssd += (frame[xi]-recon[xi])*(frame[xi]-recon[xi]);
+ frame+=stride;
+ recon+=stride;
}
+ cpi->dist_dist[plane][mode] += lssd;
+ cpi->dist_bits[plane][mode] += actual_bits[fi];
}
}
+
/* update global SAD/rate estimation matrix */
UpdateModeEstimation(cpi);
}
More information about the commits
mailing list