[xiph-commits] r17457 - in trunk/theora/lib: . arm c64x x86 x86_vc
tterribe at svn.xiph.org
tterribe at svn.xiph.org
Thu Sep 23 19:05:49 PDT 2010
Author: tterribe
Date: 2010-09-23 19:05:49 -0700 (Thu, 23 Sep 2010)
New Revision: 17457
Modified:
trunk/theora/lib/analyze.c
trunk/theora/lib/arm/armstate.c
trunk/theora/lib/c64x/c64xdec.c
trunk/theora/lib/c64x/c64xfrag.c
trunk/theora/lib/decint.h
trunk/theora/lib/decode.c
trunk/theora/lib/encint.h
trunk/theora/lib/state.c
trunk/theora/lib/state.h
trunk/theora/lib/tokenize.c
trunk/theora/lib/x86/mmxstate.c
trunk/theora/lib/x86_vc/mmxstate.c
Log:
Cache the reference frame associated with the current MB mode in each fragment.
This avoids repeatedly performing the MB mode -> reference frame translation,
which is done many times per fragment during DC prediction.
In fact, by using a special OC_FRAME_NONE tag for uncoded fragments, one need
not even check to see if a fragment is coded during DC prediction, simplifying
it even more.
This saves 2% on 720p decode on a Cortex A8.
Modified: trunk/theora/lib/analyze.c
===================================================================
--- trunk/theora/lib/analyze.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/analyze.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -683,6 +683,7 @@
oc_token_checkpoint *checkpoint;
oc_fragment *frags;
int mb_mode;
+ int refi;
int mv_offs[2];
int nmv_offs;
int ac_bits;
@@ -706,6 +707,7 @@
if(_enc->sp_level>=OC_SP_LEVEL_EARLY_SKIP){
/*Enable early skip detection.*/
frags[_fragi].coded=0;
+ frags[_fragi].refi=OC_FRAME_NONE;
oc_fr_skip_block(_fr);
return 0;
}
@@ -714,9 +716,9 @@
qii&=3;
frags[_fragi].qii=qii;
}
+ refi=frags[_fragi].refi;
mb_mode=frags[_fragi].mb_mode;
- ref=_enc->state.ref_frame_data[
- _enc->state.ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]+frag_offs;
+ ref=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[refi]]+frag_offs;
dst=_enc->state.ref_frame_data[_enc->state.ref_frame_idx[OC_FRAME_SELF]]
+frag_offs;
/*Motion compensation:*/
@@ -733,7 +735,7 @@
}break;
default:{
const oc_mv *frag_mvs;
- frag_mvs=(const oc_mv *)_enc->state.frag_mvs;
+ frag_mvs=_enc->state.frag_mvs;
nmv_offs=oc_state_get_mv_offsets(&_enc->state,mv_offs,
_pli,frag_mvs[_fragi]);
if(nmv_offs>1){
@@ -851,6 +853,7 @@
oc_enc_tokenlog_rollback(_enc,checkpoint,(*_stack)-checkpoint);
*_stack=checkpoint;
frags[_fragi].coded=0;
+ frags[_fragi].refi=OC_FRAME_NONE;
oc_fr_skip_block(_fr);
return 0;
}
@@ -887,6 +890,7 @@
oc_fr_state fr_checkpoint;
oc_qii_state qs_checkpoint;
int mb_mode;
+ int refi;
int ncoded;
ptrdiff_t fragi;
int bi;
@@ -900,11 +904,13 @@
uncoded_fragis=_pipe->uncoded_fragis[0];
nuncoded_fragis=_pipe->nuncoded_fragis[0];
mb_mode=mb_modes[_mbi];
+ refi=OC_FRAME_FOR_MODE(mb_mode);
ncoded=0;
stackptr=stack;
memset(&mo,0,sizeof(mo));
for(bi=0;bi<4;bi++){
fragi=sb_maps[_mbi>>2][_mbi&3][bi];
+ frags[fragi].refi=refi;
frags[fragi].mb_mode=mb_mode;
if(oc_enc_block_transform_quantize(_enc,_pipe,0,fragi,
_rd_scale[bi],_rd_iscale[bi],&mo,_pipe->fr+0,&stackptr)){
@@ -930,6 +936,7 @@
if(frags[fragi].coded){
*(uncoded_fragis-++nuncoded_fragis)=fragi;
frags[fragi].coded=0;
+ frags[fragi].refi=OC_FRAME_NONE;
}
oc_fr_skip_block(_pipe->fr+0);
}
@@ -1541,7 +1548,6 @@
oc_fragment *frags;
ptrdiff_t *coded_fragis;
ptrdiff_t ncoded_fragis;
- int mb_mode;
ptrdiff_t fragi;
int bi;
sb_maps=(const oc_sb_map *)_enc->state.sb_maps;
@@ -1549,11 +1555,11 @@
frags=_enc->state.frags;
coded_fragis=_pipe->coded_fragis[0];
ncoded_fragis=_pipe->ncoded_fragis[0];
- mb_mode=mb_modes[_mbi];
stackptr=stack;
for(bi=0;bi<4;bi++){
fragi=sb_maps[_mbi>>2][_mbi&3][bi];
- frags[fragi].mb_mode=mb_mode;
+ frags[fragi].refi=OC_FRAME_SELF;
+ frags[fragi].mb_mode=OC_MODE_INTRA;
oc_enc_block_transform_quantize(_enc,_pipe,0,fragi,
_rd_scale[bi],_rd_iscale[bi],NULL,NULL,&stackptr);
coded_fragis[ncoded_fragis++]=fragi;
@@ -1701,6 +1707,7 @@
pli=mapi>>2;
bi=mapi&3;
fragi=mb_maps[mbi][pli][bi];
+ frags[fragi].refi=OC_FRAME_SELF;
frags[fragi].mb_mode=OC_MODE_INTRA;
}
/*Save masking scale factors for chroma blocks.*/
@@ -2320,6 +2327,7 @@
int mb_gmv_bits_0;
int inter_mv_pref;
int mb_mode;
+ int refi;
int mv;
unsigned mbi;
int mapii;
@@ -2505,6 +2513,7 @@
int orig_mb_mode;
orig_mb_mode=mb_mode;
mb_mode=mb_modes[mbi];
+ refi=OC_FRAME_FOR_MODE(mb_mode);
switch(mb_mode){
case OC_MODE_INTER_MV:{
prior_mv=last_mv;
@@ -2557,8 +2566,9 @@
pli=mapi>>2;
bi=mapi&3;
fragi=mb_maps[mbi][pli][bi];
+ frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
+ frags[fragi].refi=refi;
frags[fragi].mb_mode=mb_mode;
- frags[fragi].qii=modes[OC_MODE_INTER_MV_FOUR].qii[mapii];
frag_mvs[fragi]=cbmvs[bi];
}
}break;
@@ -2570,6 +2580,7 @@
else{
*(uncoded_mbis-++nuncoded_mbis)=mbi;
mb_mode=OC_MODE_INTER_NOMV;
+ refi=OC_FRAME_PREV;
mv=0;
}
/*Propagate final MB mode and MVs to the chroma blocks.
@@ -2581,11 +2592,12 @@
pli=mapi>>2;
bi=mapi&3;
fragi=mb_maps[mbi][pli][bi];
- frags[fragi].mb_mode=mb_mode;
/*If we switched from 4MV mode to INTER_MV mode, then the qii
values won't have been chosen with the right MV, but it's
probaby not worth re-estimating them.*/
frags[fragi].qii=modes[mb_mode].qii[mapii];
+ frags[fragi].refi=refi;
+ frags[fragi].mb_mode=mb_mode;
frag_mvs[fragi]=mv;
}
}
Modified: trunk/theora/lib/arm/armstate.c
===================================================================
--- trunk/theora/lib/arm/armstate.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/arm/armstate.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -100,7 +100,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -117,18 +117,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA){
- oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
- }
+ if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
@@ -144,7 +140,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -161,18 +157,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA){
- oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
- }
+ if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
@@ -188,7 +180,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -205,18 +197,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA){
- oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
- }
+ if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
Modified: trunk/theora/lib/c64x/c64xdec.c
===================================================================
--- trunk/theora/lib/c64x/c64xdec.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/c64x/c64xdec.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -57,7 +57,7 @@
predictor for the same reference frame.*/
for(fragx=0;fragx<nhfrags;fragx++,fragi++){
int coded;
- int ref;
+ int refi;
/*The TI compiler refuses to pipeline this if we put it in an if(coded)
block.
We can do the loads unconditionally, which helps move them earlier.
@@ -68,9 +68,9 @@
However, these loads are "free" in the cache sense, since reading the
coded flag brings in all four bytes anyway, and starting the loads
before we know the coded flag saves 6 cycles.*/
- ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+ refi=frags[fragi].refi;
coded=frags[fragi].coded;
- frags[fragi].dc=pred_last[ref]+=frags[fragi].dc&-coded;
+ frags[fragi].dc=pred_last[refi]+=frags[fragi].dc&-coded;
ncoded_fragis+=coded;
}
}
@@ -82,16 +82,13 @@
u_frags=frags-nhfrags;
l_ref=-1;
ul_ref=-1;
- u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+ u_ref=u_frags[fragi].refi;
for(fragx=0;fragx<nhfrags;fragx++,fragi++){
int ur_ref;
- int ref;
+ int refi;
if(fragx+1>=nhfrags)ur_ref=-1;
- else{
- ur_ref=u_frags[fragi+1].coded?
- OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
- }
- ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+ else ur_ref=u_frags[fragi+1].refi;
+ refi=frags[fragi].refi;
if(frags[fragi].coded){
static const int OC_PRED_SCALE[16][2]={
{0x00000000,0x00000000},
@@ -126,8 +123,8 @@
p2=u_frags[fragi+1].dc;
p3=frags[fragi-1].dc;
pflags=_cmpeq4(_packl4(_pack2(ur_ref,u_ref),_pack2(ul_ref,l_ref)),
- _packl4(_pack2(ref,ref),_pack2(ref,ref)));
- if(pflags==0)pred=pred_last[ref];
+ _packl4(_pack2(refi,refi),_pack2(refi,refi)));
+ if(pflags==0)pred=pred_last[refi];
else{
pred=(_dotp2(_pack2(p0,p1),OC_PRED_SCALE[pflags][0])
+_dotp2(_pack2(p2,p3),OC_PRED_SCALE[pflags][1]))/128;
@@ -137,9 +134,9 @@
else if(abs(pred-p0)>128)pred=p0;
}
}
- pred_last[ref]=frags[fragi].dc+=pred;
+ pred_last[refi]=frags[fragi].dc+=pred;
ncoded_fragis++;
- l_ref=ref;
+ l_ref=refi;
}
else l_ref=-1;
ul_ref=u_ref;
Modified: trunk/theora/lib/c64x/c64xfrag.c
===================================================================
--- trunk/theora/lib/c64x/c64xfrag.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/c64x/c64xfrag.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -168,7 +168,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -188,18 +188,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA){
- oc_frag_recon_intra_c64x(dst,ystride,_dct_coeffs+64);
- }
+ if(refi==OC_FRAME_SELF)oc_frag_recon_intra_c64x(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_c64x(dst,ref+mvoffsets[0],ref+mvoffsets[1],
Modified: trunk/theora/lib/decint.h
===================================================================
--- trunk/theora/lib/decint.h 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/decint.h 2010-09-24 02:05:49 UTC (rev 17457)
@@ -105,7 +105,7 @@
const ogg_uint16_t *dequant[3][3][2];
int fragy0[3];
int fragy_end[3];
- int pred_last[3][3];
+ int pred_last[3][4];
int mcu_nvfrags;
int loop_filter;
int pp_level;
Modified: trunk/theora/lib/decode.c
===================================================================
--- trunk/theora/lib/decode.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/decode.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -508,6 +508,7 @@
fragi=sb_maps[sbi][quadi][bi];
if(fragi>=0){
frags[fragi].coded=1;
+ frags[fragi].refi=OC_FRAME_SELF;
frags[fragi].mb_mode=OC_MODE_INTRA;
coded_fragis[ncoded_fragis++]=fragi;
}
@@ -650,6 +651,7 @@
if(coded)coded_fragis[ncoded_fragis++]=fragi;
else *(uncoded_fragis-++nuncoded_fragis)=fragi;
frags[fragi].coded=coded;
+ frags[fragi].refi=OC_FRAME_NONE;
}
}
}
@@ -832,16 +834,17 @@
mb_modes=_dec->state.mb_modes;
nmbs=_dec->state.nmbs;
for(mbi=0;mbi<nmbs;mbi++){
- int mb_mode;
+ int mb_mode;
mb_mode=mb_modes[mbi];
if(mb_mode!=OC_MODE_INVALID){
- oc_mv mbmv;
- ptrdiff_t fragi;
- int coded[13];
- int codedi;
- int ncoded;
- int mapi;
- int mapii;
+ oc_mv mbmv;
+ ptrdiff_t fragi;
+ int coded[13];
+ int codedi;
+ int ncoded;
+ int mapi;
+ int mapii;
+ int refi;
/*Search for at least one coded fragment.*/
ncoded=mapii=0;
do{
@@ -851,6 +854,7 @@
}
while(++mapii<map_nidxs);
if(ncoded<=0)continue;
+ refi=OC_FRAME_FOR_MODE(mb_mode);
switch(mb_mode){
case OC_MODE_INTER_MV_FOUR:{
oc_mv lbmvs[4];
@@ -861,6 +865,7 @@
if(coded[codedi]==bi){
codedi++;
fragi=mb_maps[mbi][0][bi];
+ frags[fragi].refi=refi;
frags[fragi].mb_mode=mb_mode;
lbmvs[bi]=oc_mv_unpack(&_dec->opb,mv_comp_tree);
frag_mvs[fragi]=lbmvs[bi];
@@ -877,6 +882,7 @@
mapi=coded[codedi];
bi=mapi&3;
fragi=mb_maps[mbi][mapi>>2][bi];
+ frags[fragi].refi=refi;
frags[fragi].mb_mode=mb_mode;
frag_mvs[fragi]=cbmvs[bi];
}
@@ -903,6 +909,7 @@
for(codedi=0;codedi<ncoded;codedi++){
mapi=coded[codedi];
fragi=mb_maps[mbi][mapi>>2][mapi&3];
+ frags[fragi].refi=refi;
frags[fragi].mb_mode=mb_mode;
frag_mvs[fragi]=mbmv;
}
@@ -1426,9 +1433,9 @@
predictor for the same reference frame.*/
for(fragx=0;fragx<nhfrags;fragx++,fragi++){
if(frags[fragi].coded){
- int ref;
- ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
- pred_last[ref]=frags[fragi].dc+=pred_last[ref];
+ int refi;
+ refi=frags[fragi].refi;
+ pred_last[refi]=frags[fragi].dc+=pred_last[refi];
ncoded_fragis++;
}
}
@@ -1441,27 +1448,24 @@
u_frags=frags-nhfrags;
l_ref=-1;
ul_ref=-1;
- u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+ u_ref=u_frags[fragi].refi;
for(fragx=0;fragx<nhfrags;fragx++,fragi++){
int ur_ref;
if(fragx+1>=nhfrags)ur_ref=-1;
- else{
- ur_ref=u_frags[fragi+1].coded?
- OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
- }
+ else ur_ref=u_frags[fragi+1].refi;
if(frags[fragi].coded){
int pred;
- int ref;
- ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+ int refi;
+ refi=frags[fragi].refi;
/*We break out a separate case based on which of our neighbors use
the same reference frames.
This is somewhat faster than trying to make a generic case which
handles all of them, since it reduces lots of poorly predicted
jumps to one switch statement, and also lets a number of the
multiplications be optimized out by strength reduction.*/
- switch((l_ref==ref)|(ul_ref==ref)<<1|
- (u_ref==ref)<<2|(ur_ref==ref)<<3){
- default:pred=pred_last[ref];break;
+ switch((l_ref==refi)|(ul_ref==refi)<<1|
+ (u_ref==refi)<<2|(ur_ref==refi)<<3){
+ default:pred=pred_last[refi];break;
case 1:
case 3:pred=frags[fragi-1].dc;break;
case 2:pred=u_frags[fragi-1].dc;break;
@@ -1495,9 +1499,9 @@
else if(abs(pred-p1)>128)pred=p1;
}break;
}
- pred_last[ref]=frags[fragi].dc+=pred;
+ pred_last[refi]=frags[fragi].dc+=pred;
ncoded_fragis++;
- l_ref=ref;
+ l_ref=refi;
}
else l_ref=-1;
ul_ref=u_ref;
Modified: trunk/theora/lib/encint.h
===================================================================
--- trunk/theora/lib/encint.h 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/encint.h 2010-09-24 02:05:49 UTC (rev 17457)
@@ -676,7 +676,7 @@
/*The offset of the first DCT token for each coefficient for each plane.*/
unsigned char dct_token_offs[3][64];
/*The last DC coefficient for each plane and reference frame.*/
- int dc_pred_last[3][3];
+ int dc_pred_last[3][4];
#if defined(OC_COLLECT_METRICS)
/*Fragment SATD statistics for MB mode estimation metrics.*/
unsigned *frag_satd;
Modified: trunk/theora/lib/state.c
===================================================================
--- trunk/theora/lib/state.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/state.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -942,7 +942,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -961,18 +961,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA){
- oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
- }
+ if(refi==OC_FRAME_SELF)oc_frag_recon_intra(_state,dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2(_state,
Modified: trunk/theora/lib/state.h
===================================================================
--- trunk/theora/lib/state.h 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/state.h 2010-09-24 02:05:49 UTC (rev 17457)
@@ -173,6 +173,8 @@
# define OC_FRAME_PREV (1)
/*The current frame.*/
# define OC_FRAME_SELF (2)
+/*Used to mark uncoded fragments (for DC prediction).*/
+# define OC_FRAME_NONE (3)
/*The input or output buffer.*/
# define OC_FRAME_IO (3)
@@ -302,7 +304,9 @@
There are no fragments outside the coded frame by construction.*/
unsigned invalid:1;
/*The index of the quality index used for this fragment's AC coefficients.*/
- unsigned qii:6;
+ unsigned qii:4;
+ /*The index of the reference frame this fragment is predicted from.*/
+ unsigned refi:2;
/*The mode of the macroblock this fragment belongs to.*/
unsigned mb_mode:3;
/*The index of the associated border information for fragments which lie
Modified: trunk/theora/lib/tokenize.c
===================================================================
--- trunk/theora/lib/tokenize.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/tokenize.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -1003,10 +1003,10 @@
predictor for the same reference frame.*/
for(fragx=0;fragx<nhfrags;fragx++,fragi++){
if(frags[fragi].coded){
- int ref;
- ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
- frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[ref]);
- pred_last[ref]=frags[fragi].dc;
+ int refi;
+ refi=frags[fragi].refi;
+ frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred_last[refi]);
+ pred_last[refi]=frags[fragi].dc;
}
}
}
@@ -1018,27 +1018,24 @@
u_frags=frags-nhfrags;
l_ref=-1;
ul_ref=-1;
- u_ref=u_frags[fragi].coded?OC_FRAME_FOR_MODE(u_frags[fragi].mb_mode):-1;
+ u_ref=u_frags[fragi].refi;
for(fragx=0;fragx<nhfrags;fragx++,fragi++){
int ur_ref;
if(fragx+1>=nhfrags)ur_ref=-1;
- else{
- ur_ref=u_frags[fragi+1].coded?
- OC_FRAME_FOR_MODE(u_frags[fragi+1].mb_mode):-1;
- }
+ else ur_ref=u_frags[fragi+1].refi;
if(frags[fragi].coded){
int pred;
- int ref;
- ref=OC_FRAME_FOR_MODE(frags[fragi].mb_mode);
+ int refi;
+ refi=frags[fragi].refi;
/*We break out a separate case based on which of our neighbors use
the same reference frames.
This is somewhat faster than trying to make a generic case which
handles all of them, since it reduces lots of poorly predicted
jumps to one switch statement, and also lets a number of the
multiplications be optimized out by strength reduction.*/
- switch((l_ref==ref)|(ul_ref==ref)<<1|
- (u_ref==ref)<<2|(ur_ref==ref)<<3){
- default:pred=pred_last[ref];break;
+ switch((l_ref==refi)|(ul_ref==refi)<<1|
+ (u_ref==refi)<<2|(ur_ref==refi)<<3){
+ default:pred=pred_last[refi];break;
case 1:
case 3:pred=frags[fragi-1].dc;break;
case 2:pred=u_frags[fragi-1].dc;break;
@@ -1072,8 +1069,8 @@
}break;
}
frag_dc[fragi]=(ogg_int16_t)(frags[fragi].dc-pred);
- pred_last[ref]=frags[fragi].dc;
- l_ref=ref;
+ pred_last[refi]=frags[fragi].dc;
+ l_ref=refi;
}
else l_ref=-1;
ul_ref=u_ref;
Modified: trunk/theora/lib/x86/mmxstate.c
===================================================================
--- trunk/theora/lib/x86/mmxstate.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/x86/mmxstate.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -28,7 +28,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -67,16 +67,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
+ if(refi==OC_FRAME_SELF)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
Modified: trunk/theora/lib/x86_vc/mmxstate.c
===================================================================
--- trunk/theora/lib/x86_vc/mmxstate.c 2010-09-23 23:13:52 UTC (rev 17456)
+++ trunk/theora/lib/x86_vc/mmxstate.c 2010-09-24 02:05:49 UTC (rev 17457)
@@ -28,7 +28,7 @@
unsigned char *dst;
ptrdiff_t frag_buf_off;
int ystride;
- int mb_mode;
+ int refi;
/*Apply the inverse transform.*/
/*Special case only having a DC component.*/
if(_last_zzi<2){
@@ -78,16 +78,14 @@
}
/*Fill in the target buffer.*/
frag_buf_off=_state->frag_buf_offs[_fragi];
- mb_mode=_state->frags[_fragi].mb_mode;
+ refi=_state->frags[_fragi].refi;
ystride=_state->ref_ystride[_pli];
dst=_state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_SELF]]+frag_buf_off;
- if(mb_mode==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
+ if(refi==OC_MODE_INTRA)oc_frag_recon_intra_mmx(dst,ystride,_dct_coeffs+64);
else{
const unsigned char *ref;
int mvoffsets[2];
- ref=
- _state->ref_frame_data[_state->ref_frame_idx[OC_FRAME_FOR_MODE(mb_mode)]]
- +frag_buf_off;
+ ref=_state->ref_frame_data[_state->ref_frame_idx[refi]]+frag_buf_off;
if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
_state->frag_mvs[_fragi])>1){
oc_frag_recon_inter2_mmx(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
More information about the commits
mailing list